From 8f06ac496e524252b2f06bbf00cfade20a10ec6b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 4 Jun 2024 08:20:07 +1000 Subject: [PATCH 001/342] reworked collect outputs so callable's are used over simply copying value from input --- pydra/engine/specs.py | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index a2e3651779..691c6148c0 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -454,31 +454,20 @@ def collect_additional_outputs(self, inputs, output_dir, outputs): ) # assuming that field should have either default or metadata, but not both input_value = getattr(inputs, fld.name, attr.NOTHING) - if input_value is not attr.NOTHING: - if TypeParser.contains_type(FileSet, fld.type): - if input_value is not False: - label = f"output field '{fld.name}' of {self}" - input_value = TypeParser(fld.type, label=label).coerce( - input_value - ) - additional_out[fld.name] = input_value - elif ( - fld.default is None or fld.default == attr.NOTHING - ) and not fld.metadata: # TODO: is it right? - raise AttributeError("File has to have default value or metadata") + if fld.metadata and "callable" in fld.metadata: + fld_out = self._field_metadata(fld, inputs, output_dir, outputs) + elif fld.type in [int, float, bool, str, list]: + raise AttributeError(f"{fld.type} has to have a callable in metadata") + elif input_value: # Map input value through to output + fld_out = input_value elif fld.default != attr.NOTHING: - additional_out[fld.name] = self._field_defaultvalue(fld, output_dir) - elif fld.metadata: - if ( - fld.type in [int, float, bool, str, list] - and "callable" not in fld.metadata - ): - raise AttributeError( - f"{fld.type} has to have a callable in metadata" - ) - additional_out[fld.name] = self._field_metadata( - fld, inputs, output_dir, outputs - ) + fld_out = self._field_defaultvalue(fld, output_dir) + else: + raise AttributeError("File has to have default value or metadata") + if TypeParser.contains_type(FileSet, fld.type): + label = f"output field '{fld.name}' of {self}" + fld_out = TypeParser(fld.type, label=label).coerce(fld_out) + additional_out[fld.name] = fld_out return additional_out def generated_output_names(self, inputs, output_dir): From f06171df61cd9fae62193c2adfeac8875ffaa585 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Jun 2024 16:45:07 +1000 Subject: [PATCH 002/342] if bool field argstr includes a string template, do templating instead of just argstr --- pydra/engine/task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/task.py b/pydra/engine/task.py index cb55d9e390..b4970aae2c 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -468,7 +468,7 @@ def _command_pos_args(self, field, root=None): cmd_el_str = cmd_el_str.strip().replace(" ", " ") if cmd_el_str != "": cmd_add += split_cmd(cmd_el_str) - elif field.type is bool: + elif field.type is bool and "{" not in argstr: # if value is simply True the original argstr is used, # if False, nothing is added to the command. if value is True: From 2b33473dccdcc889d1fea7288e7a45c5a8806f03 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:36:33 +1000 Subject: [PATCH 003/342] bugfix for argstr formatting for bool types --- pydra/engine/helpers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index e6eaa012ef..2aa88d9bc9 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -656,6 +656,7 @@ def argstr_formatting(argstr, inputs, value_updates=None): fld_attr = getattr(attrs.fields(type(inputs)), fld_name) if fld_value is attr.NOTHING or ( fld_value is False + and fld_attr.type is not bool and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) ): # if value is NOTHING, nothing should be added to the command From b17709199eddd2362153669bb029eb3f032e925b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 004/342] wrote cmd_out arg and unittest based on "ls" for shell_cmd --- pydra/mark/__init__.py | 3 +- pydra/mark/shell_commands.py | 156 ++++++++++++++++++++++++ pydra/mark/tests/test_shell_commands.py | 75 ++++++++++++ 3 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 pydra/mark/shell_commands.py create mode 100644 pydra/mark/tests/test_shell_commands.py diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py index 31e4cf832e..d4338cf621 100644 --- a/pydra/mark/__init__.py +++ b/pydra/mark/__init__.py @@ -1,3 +1,4 @@ from .functions import annotate, task +from .shell_commands import cmd_arg, cmd_out -__all__ = ("annotate", "task") +__all__ = ("annotate", "task", "cmd_arg", "cmd_out") diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell_commands.py new file mode 100644 index 0000000000..62579ce543 --- /dev/null +++ b/pydra/mark/shell_commands.py @@ -0,0 +1,156 @@ +"""Decorators and helper functions to create ShellCommandTasks used in Pydra workflows""" +from __future__ import annotations +import typing as ty +import attrs + + +def cmd_arg( + help_string: str, + default: ty.Any = attrs.NOTHING, + argstr: str = None, + position: int = None, + mandatory: bool = False, + sep: str = None, + allowed_values: list = None, + requires: list = None, + xor: list = None, + copyfile: bool = None, + container_path: bool = False, + output_file_template: str = None, + output_field_name: str = None, + keep_extension: bool = True, + readonly: bool = False, + formatter: ty.Callable = None, +): + """ + Returns an attrs field with appropriate metadata for it to be added as an argument in + a Pydra shell command task definition + + Parameters + ------------ + help_string: str + A short description of the input field. + default : Any, optional + the default value for the argument + argstr: str, optional + A flag or string that is used in the command before the value, e.g. -v or + -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, + the flag is used before every element if a list is provided as a value. If no + argstr is used the field is not part of the command. + position: int, optional + Position of the field in the command, could be nonnegative or negative integer. + If nothing is provided the field will be inserted between all fields with + nonnegative positions and fields with negative positions. + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + sep: str, optional + A separator if a list is provided as a value. + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + List of field names that are required together with the field. + xor: list, optional + List of field names that are mutually exclusive with the field. + copyfile: bool, optional + If True, a hard link is created for the input file in the output directory. If + hard link not possible, the file is copied to the output directory, by default + it is False + container_path: bool, optional + If True a path will be consider as a path inside the container (and not as a + local path, by default it is False + output_file_template: str, optional + If provided, the field is treated also as an output field and it is added to + the output spec. The template can use other fields, e.g. {file1}. Used in order + to create an output specification. + output_field_name: str, optional + If provided the field is added to the output spec with changed name. Used in + order to create an output specification. Used together with output_file_template + keep_extension: bool, optional + A flag that specifies if the file extension should be removed from the field value. + Used in order to create an output specification, by default it is True + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + formatter: function, optional + If provided the argstr of the field is created using the function. This function + can for example be used to combine several inputs into one command argument. The + function can take field (this input field will be passed to the function), + inputs (entire inputs will be passed) or any input field name (a specific input + field will be sent). + """ + + metadata = { + "help_string": help_string, + "argstr": argstr, + "position": position, + "mandatory": mandatory, + "sep": sep, + "allowed_values": allowed_values, + "requires": requires, + "xor": xor, + "copyfile": copyfile, + "container_path": container_path, + "output_file_template": output_file_template, + "output_field_name": output_field_name, + "keep_extension": keep_extension, + "readonly": readonly, + "formatter": formatter, + } + + return attrs.field( + default=default, metadata={k: v for k, v in metadata.items() if v is not None} + ) + + +def cmd_out( + help_string: str, + mandatory: bool = False, + output_file_template: str = None, + output_field_name: str = None, + keep_extension: bool = True, + requires: list = None, + callable: ty.Callable = None, +): + """Returns an attrs field with appropriate metadata for it to be added as an output of + a Pydra shell command task definition + + Parameters + ---------- + help_string: str + A short description of the input field. The same as in input_spec. + mandatory: bool, default: False + If True the output file has to exist, otherwise an error will be raised. + output_file_template: str, optional + If provided the output file name (or list of file names) is created using the + template. The template can use other fields, e.g. {file1}. The same as in + input_spec. + output_field_name: str, optional + If provided the field is added to the output spec with changed name. The same as + in input_spec. Used together with output_file_template + keep_extension: bool, default: True + A flag that specifies if the file extension should be removed from the field + value. The same as in input_spec. + requires: list + List of field names that are required to create a specific output. The fields + do not have to be a part of the output_file_template and if any field from the + list is not provided in the input, a NOTHING is returned for the specific output. + This has a different meaning than the requires form the input_spec. + callable: Callable + If provided the output file name (or list of file names) is created using the + function. The function can take field (the specific output field will be passed + to the function), output_dir (task output_dir will be used), stdout, stderr + (stdout and stderr of the task will be sent) inputs (entire inputs will be + passed) or any input field name (a specific input field will be sent). + """ + metadata = { + "help_string": help_string, + "mandatory": mandatory, + "output_file_template": output_file_template, + "output_field_name": output_field_name, + "keep_extension": keep_extension, + "requires": requires, + "callable": callable, + } + + return attrs.field(metadata={k: v for k, v in metadata.items() if v is not None}) diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell_commands.py new file mode 100644 index 0000000000..084f4464bb --- /dev/null +++ b/pydra/mark/tests/test_shell_commands.py @@ -0,0 +1,75 @@ +import os +import tempfile +from pathlib import Path +import attrs +import pydra.engine +from pydra.mark import cmd_arg, cmd_out + + +def test_shell_cmd(): + @attrs.define(kw_only=True, slots=False) + class LsInputSpec(pydra.specs.ShellSpec): + directory: os.PathLike = cmd_arg( + help_string="the directory to list the contents of", + argstr="", + mandatory=True, + ) + hidden: bool = cmd_arg(help_string=("display hidden FS objects"), argstr="-a") + long_format: bool = cmd_arg( + help_string=( + "display properties of FS object, such as permissions, size and timestamps " + ), + argstr="-l", + ) + human_readable: bool = cmd_arg( + help_string="display file sizes in human readable form", + argstr="-h", + requires=["long_format"], + ) + complete_date: bool = cmd_arg( + help_string="Show complete date in long format", + argstr="-T", + requires=["long_format"], + xor=["date_format_str"], + ) + date_format_str: str = cmd_arg( + help_string="format string for ", + argstr="-D", + requires=["long_format"], + xor=["complete_date"], + ) + + def list_outputs(stdout): + return stdout.split("\n")[:-1] + + @attrs.define(kw_only=True, slots=False) + class LsOutputSpec(pydra.specs.ShellOutSpec): + entries: list = cmd_out( + help_string="list of entries returned by ls command", callable=list_outputs + ) + + class Ls(pydra.engine.ShellCommandTask): + """Task definition for mri_aparc2aseg.""" + + executable = "ls" + + input_spec = pydra.specs.SpecInfo( + name="LsInput", + bases=(LsInputSpec,), + ) + + output_spec = pydra.specs.SpecInfo( + name="LsOutput", + bases=(LsOutputSpec,), + ) + + tmpdir = Path(tempfile.mkdtemp()) + Path.touch(tmpdir / "a") + Path.touch(tmpdir / "b") + Path.touch(tmpdir / "c") + + ls = Ls(directory=tmpdir) + + result = ls() + + assert result.output.entries == ["a", "b", "c"] From 5758eb3d8163c0d484188320c0736d3af0004968 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 005/342] added small note to docs --- docs/input_spec.rst | 11 ++++++----- pydra/mark/tests/test_shell_commands.py | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/input_spec.rst b/docs/input_spec.rst index 48d66fd814..92e4c945e5 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -174,8 +174,9 @@ In the example we used multiple keys in the metadata dictionary including `help_ (a specific input field will be sent). -Validators ----------- -Pydra allows for using simple validator for types and `allowev_values`. -The validators are disabled by default, but can be enabled by calling -`pydra.set_input_validator(flag=True)`. +`cmd_arg` Function +------------------ + +For convenience, there is a function in `pydra.mark` called `cmd_arg()`, which will +takes the above metadata values as arguments and inserts them into the metadata passed +to `attrs.field`. This can be especially useful when using an IDE with code-completion. diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell_commands.py index 084f4464bb..e6127edcde 100644 --- a/pydra/mark/tests/test_shell_commands.py +++ b/pydra/mark/tests/test_shell_commands.py @@ -49,7 +49,7 @@ class LsOutputSpec(pydra.specs.ShellOutSpec): ) class Ls(pydra.engine.ShellCommandTask): - """Task definition for mri_aparc2aseg.""" + """Task definition for the `ls` command line tool""" executable = "ls" From 0f0485c87d2d49558bf5c61f00fcb1ee497a8ccc Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 006/342] renamed cmd_arg and cmd_out to shell_arg and shell_out --- docs/input_spec.rst | 4 ++-- pydra/mark/__init__.py | 4 ++-- pydra/mark/shell_commands.py | 4 ++-- pydra/mark/tests/test_shell_commands.py | 16 ++++++++-------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/input_spec.rst b/docs/input_spec.rst index 92e4c945e5..559c2c1f66 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -174,9 +174,9 @@ In the example we used multiple keys in the metadata dictionary including `help_ (a specific input field will be sent). -`cmd_arg` Function +`shell_arg` Function ------------------ -For convenience, there is a function in `pydra.mark` called `cmd_arg()`, which will +For convenience, there is a function in `pydra.mark` called `shell_arg()`, which will takes the above metadata values as arguments and inserts them into the metadata passed to `attrs.field`. This can be especially useful when using an IDE with code-completion. diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py index d4338cf621..5fae37d03d 100644 --- a/pydra/mark/__init__.py +++ b/pydra/mark/__init__.py @@ -1,4 +1,4 @@ from .functions import annotate, task -from .shell_commands import cmd_arg, cmd_out +from .shell_commands import shell_arg, shell_out -__all__ = ("annotate", "task", "cmd_arg", "cmd_out") +__all__ = ("annotate", "task", "shell_arg", "shell_out") diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell_commands.py index 62579ce543..71d4b10f78 100644 --- a/pydra/mark/shell_commands.py +++ b/pydra/mark/shell_commands.py @@ -4,7 +4,7 @@ import attrs -def cmd_arg( +def shell_arg( help_string: str, default: ty.Any = attrs.NOTHING, argstr: str = None, @@ -103,7 +103,7 @@ def cmd_arg( ) -def cmd_out( +def shell_out( help_string: str, mandatory: bool = False, output_file_template: str = None, diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell_commands.py index e6127edcde..5f2b428024 100644 --- a/pydra/mark/tests/test_shell_commands.py +++ b/pydra/mark/tests/test_shell_commands.py @@ -3,36 +3,36 @@ from pathlib import Path import attrs import pydra.engine -from pydra.mark import cmd_arg, cmd_out +from pydra.mark import shell_arg, shell_out def test_shell_cmd(): @attrs.define(kw_only=True, slots=False) class LsInputSpec(pydra.specs.ShellSpec): - directory: os.PathLike = cmd_arg( + directory: os.PathLike = shell_arg( help_string="the directory to list the contents of", argstr="", mandatory=True, ) - hidden: bool = cmd_arg(help_string=("display hidden FS objects"), argstr="-a") - long_format: bool = cmd_arg( + hidden: bool = shell_arg(help_string=("display hidden FS objects"), argstr="-a") + long_format: bool = shell_arg( help_string=( "display properties of FS object, such as permissions, size and timestamps " ), argstr="-l", ) - human_readable: bool = cmd_arg( + human_readable: bool = shell_arg( help_string="display file sizes in human readable form", argstr="-h", requires=["long_format"], ) - complete_date: bool = cmd_arg( + complete_date: bool = shell_arg( help_string="Show complete date in long format", argstr="-T", requires=["long_format"], xor=["date_format_str"], ) - date_format_str: str = cmd_arg( + date_format_str: str = shell_arg( help_string="format string for ", argstr="-D", requires=["long_format"], @@ -44,7 +44,7 @@ def list_outputs(stdout): @attrs.define(kw_only=True, slots=False) class LsOutputSpec(pydra.specs.ShellOutSpec): - entries: list = cmd_out( + entries: list = shell_out( help_string="list of entries returned by ls command", callable=list_outputs ) From 16cecf1a9a675f280c651983f7665e8a0897af9d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 007/342] touched up docs --- docs/input_spec.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/input_spec.rst b/docs/input_spec.rst index 559c2c1f66..2940c17820 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -175,7 +175,7 @@ In the example we used multiple keys in the metadata dictionary including `help_ `shell_arg` Function ------------------- +-------------------- For convenience, there is a function in `pydra.mark` called `shell_arg()`, which will takes the above metadata values as arguments and inserts them into the metadata passed From 6ee5b8621f6f9b4f2ab26181521e8028856e3524 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 008/342] pass through kwargs to attrs.field in shell_arg and shell_out --- pydra/mark/shell_commands.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell_commands.py index 71d4b10f78..b9d29db21b 100644 --- a/pydra/mark/shell_commands.py +++ b/pydra/mark/shell_commands.py @@ -21,6 +21,7 @@ def shell_arg( keep_extension: bool = True, readonly: bool = False, formatter: ty.Callable = None, + **kwargs, ): """ Returns an attrs field with appropriate metadata for it to be added as an argument in @@ -78,6 +79,8 @@ def shell_arg( function can take field (this input field will be passed to the function), inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). + **kwargs + remaining keyword arguments are passed onto the underlying attrs.field function """ metadata = { @@ -99,7 +102,9 @@ def shell_arg( } return attrs.field( - default=default, metadata={k: v for k, v in metadata.items() if v is not None} + default=default, + metadata={k: v for k, v in metadata.items() if v is not None}, + **kwargs, ) @@ -111,6 +116,7 @@ def shell_out( keep_extension: bool = True, requires: list = None, callable: ty.Callable = None, + **kwargs, ): """Returns an attrs field with appropriate metadata for it to be added as an output of a Pydra shell command task definition @@ -142,6 +148,8 @@ def shell_out( to the function), output_dir (task output_dir will be used), stdout, stderr (stdout and stderr of the task will be sent) inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). + **kwargs + remaining keyword arguments are passed onto the underlying attrs.field function """ metadata = { "help_string": help_string, @@ -153,4 +161,6 @@ def shell_out( "callable": callable, } - return attrs.field(metadata={k: v for k, v in metadata.items() if v is not None}) + return attrs.field( + metadata={k: v for k, v in metadata.items() if v is not None}, **kwargs + ) From 47806ff3b68d7245f2efeb630cc1cdf9dd3492f9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 009/342] added shell_task decorator --- pydra/mark/__init__.py | 4 +- pydra/mark/shell_commands.py | 127 ++++++++++++++++++++++++ pydra/mark/tests/test_shell_commands.py | 4 +- 3 files changed, 131 insertions(+), 4 deletions(-) diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py index 5fae37d03d..2ff75d3b0f 100644 --- a/pydra/mark/__init__.py +++ b/pydra/mark/__init__.py @@ -1,4 +1,4 @@ from .functions import annotate, task -from .shell_commands import shell_arg, shell_out +from .shell_commands import shell_task, shell_arg, shell_out -__all__ = ("annotate", "task", "shell_arg", "shell_out") +__all__ = ("annotate", "task", "shell_task", "shell_arg", "shell_out") diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell_commands.py index b9d29db21b..919b6e1ac2 100644 --- a/pydra/mark/shell_commands.py +++ b/pydra/mark/shell_commands.py @@ -2,6 +2,133 @@ from __future__ import annotations import typing as ty import attrs +import pydra.engine.specs + + +def shell_task( + klass_or_name: ty.Union[type, str], + executable: ty.Optional[str] = None, + input_fields: ty.Optional[dict[str, dict]] = None, + output_fields: ty.Optional[dict[str, dict]] = None, + bases: ty.Optional[list[type]] = None, + input_bases: ty.Optional[list[type]] = None, + output_bases: ty.Optional[list[type]] = None, +) -> type: + """ + Construct an analysis class and validate all the components fit together + + Parameters + ---------- + klass_or_name : type or str + Either the class decorated by the @shell_task decorator or the name for a + dynamically generated class + executable : str, optional + If dynamically constructing a class (instead of decorating an existing one) the + name of the executable to run is provided + input_fields : dict[str, dict], optional + If dynamically constructing a class (instead of decorating an existing one) the + input fields can be provided as a dictionary of dictionaries, where the keys + are the name of the fields and the dictionary contents are passed as keyword + args to cmd_arg, with the exception of "type", which is used as the type annotation + of the field. + output_fields : dict[str, dict], optional + If dynamically constructing a class (instead of decorating an existing one) the + output fields can be provided as a dictionary of dictionaries, where the keys + are the name of the fields and the dictionary contents are passed as keyword + args to cmd_out, with the exception of "type", which is used as the type annotation + of the field. + bases : list[type] + Base classes for dynamically constructed shell command classes + input_bases : list[type] + Base classes for the input spec of dynamically constructed shell command classes + output_bases : list[type] + Base classes for the input spec of dynamically constructed shell command classes + + Returns + ------- + type + the shell command task class + """ + + if isinstance(klass_or_name, str): + if None in (executable, input_fields): + raise RuntimeError( + "Dynamically constructed shell tasks require an executable and " + "input_field arguments" + ) + name = klass_or_name + if output_fields is None: + output_fields = {} + if bases is None: + bases = [pydra.engine.task.ShellCommandTask] + if input_bases is None: + input_bases = [pydra.engine.specs.ShellSpec] + if output_bases is None: + output_bases = [pydra.engine.specs.ShellOutSpec] + Inputs = type("Inputs", tuple(input_bases), input_fields) + Outputs = type("Outputs", tuple(output_bases), output_fields) + else: + if ( + executable, + input_fields, + output_fields, + bases, + input_bases, + output_bases, + ) != (None, None, None, None, None, None): + raise RuntimeError( + "When used as a decorator on a class `shell_task` should not be provided " + "executable, input_field or output_field arguments" + ) + klass = klass_or_name + name = klass.__name__ + try: + executable = klass.executable + except KeyError: + raise RuntimeError( + "Classes decorated by `shell_task` should contain an `executable` attribute " + "specifying the shell tool to run" + ) + try: + Inputs = klass.Inputs + except KeyError: + raise RuntimeError( + "Classes decorated by `shell_task` should contain an `Inputs` class attribute " + "specifying the inputs to the shell tool" + ) + if not issubclass(Inputs, pydra.engine.specs.ShellSpec): + Inputs = type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) + try: + Outputs = klass.Outputs + except KeyError: + Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,)) + bases = [klass] + if not issubclass(klass, pydra.engine.task.ShellCommandTask): + bases.append(pydra.engine.task.ShellCommandTask) + + Inputs = attrs.define(kw_only=True, slots=False)(Inputs) + Outputs = attrs.define(kw_only=True, slots=False)(Outputs) + + dct = { + "executable": executable, + "Inputs": Outputs, + "Outputs": Inputs, + "inputs": attrs.field(factory=Inputs), + "outputs": attrs.field(factory=Outputs), + "__annotations__": { + "executable": str, + "inputs": Inputs, + "outputs": Outputs, + }, + } + + return attrs.define(kw_only=True, slots=False)( + type( + name, + tuple(bases), + dct, + ) + ) def shell_arg( diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell_commands.py index 5f2b428024..b3ce3fac83 100644 --- a/pydra/mark/tests/test_shell_commands.py +++ b/pydra/mark/tests/test_shell_commands.py @@ -3,10 +3,10 @@ from pathlib import Path import attrs import pydra.engine -from pydra.mark import shell_arg, shell_out +from pydra.mark import shell_task, shell_arg, shell_out -def test_shell_cmd(): +def test_shell_task_full(): @attrs.define(kw_only=True, slots=False) class LsInputSpec(pydra.specs.ShellSpec): directory: os.PathLike = shell_arg( From c0d7011a8637144a6abb629dc50a1109ead4ff70 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 010/342] implemented shell_task and basic unittests. Generated tasks do not work as need to determine best way to map onto input_spec --- pydra/mark/shell_commands.py | 81 ++++++--- pydra/mark/tests/test_shell_commands.py | 222 ++++++++++++++++++------ 2 files changed, 227 insertions(+), 76 deletions(-) diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell_commands.py index 919b6e1ac2..6849909ac4 100644 --- a/pydra/mark/shell_commands.py +++ b/pydra/mark/shell_commands.py @@ -57,16 +57,45 @@ def shell_task( "input_field arguments" ) name = klass_or_name + if output_fields is None: output_fields = {} - if bases is None: - bases = [pydra.engine.task.ShellCommandTask] - if input_bases is None: - input_bases = [pydra.engine.specs.ShellSpec] - if output_bases is None: - output_bases = [pydra.engine.specs.ShellOutSpec] - Inputs = type("Inputs", tuple(input_bases), input_fields) - Outputs = type("Outputs", tuple(output_bases), output_fields) + + # Ensure bases are lists and can be modified + bases = list(bases) if bases is not None else [] + input_bases = list(input_bases) if input_bases is not None else [] + output_bases = list(output_bases) if output_bases is not None else [] + + # Ensure base classes included somewhere in MRO + def ensure_base_of(base_class: type, bases_list: list[type]): + if not any(issubclass(b, base_class) for b in bases_list): + bases_list.append(base_class) + + ensure_base_of(pydra.engine.task.ShellCommandTask, bases) + ensure_base_of(pydra.engine.specs.ShellSpec, input_bases) + ensure_base_of(pydra.engine.specs.ShellOutSpec, output_bases) + + def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): + annotations = {} + attrs_dict = {"__annotations__": annotations} + for name, dct in fields.items(): + kwargs = dict(dct) # copy to avoid modifying input to outer function + annotations[name] = kwargs.pop("type") + attrs_dict[name] = attrs_func(**kwargs) + return attrs_dict + + Inputs = attrs.define(kw_only=True, slots=False)( + type( + "Inputs", tuple(input_bases), convert_to_attrs(input_fields, shell_arg) + ) + ) + Outputs = attrs.define(kw_only=True, slots=False)( + type( + "Outputs", + tuple(output_bases), + convert_to_attrs(output_fields, shell_out), + ) + ) else: if ( executable, @@ -96,39 +125,43 @@ def shell_task( "Classes decorated by `shell_task` should contain an `Inputs` class attribute " "specifying the inputs to the shell tool" ) - if not issubclass(Inputs, pydra.engine.specs.ShellSpec): - Inputs = type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) + try: Outputs = klass.Outputs except KeyError: Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,)) + + Inputs = attrs.define(kw_only=True, slots=False)(Inputs) + Outputs = attrs.define(kw_only=True, slots=False)(Outputs) + + if not issubclass(Inputs, pydra.engine.specs.ShellSpec): + Inputs = attrs.define(kw_only=True, slots=False)( + type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) + ) + + if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): + Outputs = attrs.define(kw_only=True, slots=False)( + type("Outputs", (Outputs, pydra.engine.specs.ShellOutSpec), {}) + ) + bases = [klass] if not issubclass(klass, pydra.engine.task.ShellCommandTask): bases.append(pydra.engine.task.ShellCommandTask) - Inputs = attrs.define(kw_only=True, slots=False)(Inputs) - Outputs = attrs.define(kw_only=True, slots=False)(Outputs) - dct = { "executable": executable, - "Inputs": Outputs, - "Outputs": Inputs, - "inputs": attrs.field(factory=Inputs), - "outputs": attrs.field(factory=Outputs), + "Inputs": Inputs, + "Outputs": Outputs, "__annotations__": { "executable": str, "inputs": Inputs, "outputs": Outputs, + "Inputs": type, + "Outputs": type, }, } - return attrs.define(kw_only=True, slots=False)( - type( - name, - tuple(bases), - dct, - ) - ) + return type(name, tuple(bases), dct) def shell_arg( diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell_commands.py index b3ce3fac83..018849558e 100644 --- a/pydra/mark/tests/test_shell_commands.py +++ b/pydra/mark/tests/test_shell_commands.py @@ -1,69 +1,187 @@ import os import tempfile -from pathlib import Path import attrs -import pydra.engine +from pathlib import Path +import pytest +import cloudpickle as cp from pydra.mark import shell_task, shell_arg, shell_out -def test_shell_task_full(): - @attrs.define(kw_only=True, slots=False) - class LsInputSpec(pydra.specs.ShellSpec): - directory: os.PathLike = shell_arg( - help_string="the directory to list the contents of", - argstr="", - mandatory=True, - ) - hidden: bool = shell_arg(help_string=("display hidden FS objects"), argstr="-a") - long_format: bool = shell_arg( - help_string=( - "display properties of FS object, such as permissions, size and timestamps " - ), - argstr="-l", - ) - human_readable: bool = shell_arg( - help_string="display file sizes in human readable form", - argstr="-h", - requires=["long_format"], - ) - complete_date: bool = shell_arg( - help_string="Show complete date in long format", - argstr="-T", - requires=["long_format"], - xor=["date_format_str"], - ) - date_format_str: str = shell_arg( - help_string="format string for ", - argstr="-D", - requires=["long_format"], - xor=["complete_date"], - ) +def list_entries(stdout): + return stdout.split("\n")[:-1] - def list_outputs(stdout): - return stdout.split("\n")[:-1] - @attrs.define(kw_only=True, slots=False) - class LsOutputSpec(pydra.specs.ShellOutSpec): - entries: list = shell_out( - help_string="list of entries returned by ls command", callable=list_outputs - ) +@pytest.fixture +def tmpdir(): + return Path(tempfile.mkdtemp()) - class Ls(pydra.engine.ShellCommandTask): - """Task definition for the `ls` command line tool""" - executable = "ls" +@pytest.fixture(params=["static", "dynamic"]) +def Ls(request): + if request.param == "static": - input_spec = pydra.specs.SpecInfo( - name="LsInput", - bases=(LsInputSpec,), - ) + @shell_task + class Ls: + executable = "ls" - output_spec = pydra.specs.SpecInfo( - name="LsOutput", - bases=(LsOutputSpec,), + class Inputs: + directory: os.PathLike = shell_arg( + help_string="the directory to list the contents of", + argstr="", + mandatory=True, + ) + hidden: bool = shell_arg( + help_string=("display hidden FS objects"), + argstr="-a", + default=False, + ) + long_format: bool = shell_arg( + help_string=( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + default=False, + argstr="-l", + ) + human_readable: bool = shell_arg( + help_string="display file sizes in human readable form", + argstr="-h", + default=False, + requires=["long_format"], + ) + complete_date: bool = shell_arg( + help_string="Show complete date in long format", + argstr="-T", + default=False, + requires=["long_format"], + xor=["date_format_str"], + ) + date_format_str: str = shell_arg( + help_string="format string for ", + argstr="-D", + default=None, + requires=["long_format"], + xor=["complete_date"], + ) + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + elif request.param == "dynamic": + Ls = shell_task( + "Ls", + executable="ls", + input_fields={ + "directory": { + "type": os.PathLike, + "help_string": "the directory to list the contents of", + "argstr": "", + "mandatory": True, + }, + "hidden": { + "type": bool, + "help_string": "display hidden FS objects", + "argstr": "-a", + }, + "long_format": { + "type": bool, + "help_string": ( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + "argstr": "-l", + }, + "human_readable": { + "type": bool, + "help_string": "display file sizes in human readable form", + "argstr": "-h", + "requires": ["long_format"], + }, + "complete_date": { + "type": bool, + "help_string": "Show complete date in long format", + "argstr": "-T", + "requires": ["long_format"], + "xor": ["date_format_str"], + }, + "date_format_str": { + "type": str, + "help_string": "format string for ", + "argstr": "-D", + "requires": ["long_format"], + "xor": ["complete_date"], + }, + }, + output_fields={ + "entries": { + "type": list, + "help_string": "list of entries returned by ls command", + "callable": list_entries, + } + }, ) - tmpdir = Path(tempfile.mkdtemp()) + else: + assert False + + return Ls + + +def test_shell_task_fields(Ls): + assert [a.name for a in attrs.fields(Ls.Inputs)] == [ + "executable", + "args", + "directory", + "hidden", + "long_format", + "human_readable", + "complete_date", + "date_format_str", + ] + + assert [a.name for a in attrs.fields(Ls.Outputs)] == [ + "return_code", + "stdout", + "stderr", + "entries", + ] + + +def test_shell_task_pickle_roundtrip(Ls, tmpdir): + pkl_file = tmpdir / "ls.pkl" + with open(pkl_file, "wb") as f: + cp.dump(Ls, f) + + with open(pkl_file, "rb") as f: + RereadLs = cp.load(f) + + assert RereadLs is Ls + + +@pytest.mark.xfail( + reason=( + "Need to change relationship between Inputs/Outputs and input_spec/output_spec " + "for the task to run" + ) +) +def test_shell_task_init(Ls, tmpdir): + inputs = Ls.Inputs(directory=tmpdir) + assert inputs.directory == tmpdir + assert not inputs.hidden + outputs = Ls.Outputs(entries=[]) + assert outputs.entries == [] + + +@pytest.mark.xfail( + reason=( + "Need to change relationship between Inputs/Outputs and input_spec/output_spec " + "for the task to run" + ) +) +def test_shell_task_run(Ls, tmpdir): Path.touch(tmpdir / "a") Path.touch(tmpdir / "b") Path.touch(tmpdir / "c") From 0da3b4657e5631fdc0de8c95ec314191a00da8de Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 011/342] shell command unittests pass --- pydra/mark/shell_commands.py | 185 +++++++++++++++++------- pydra/mark/tests/test_shell_commands.py | 134 ++++++++++++++--- 2 files changed, 242 insertions(+), 77 deletions(-) diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell_commands.py index 6849909ac4..9ce8b9364d 100644 --- a/pydra/mark/shell_commands.py +++ b/pydra/mark/shell_commands.py @@ -2,6 +2,8 @@ from __future__ import annotations import typing as ty import attrs + +# import os import pydra.engine.specs @@ -11,8 +13,8 @@ def shell_task( input_fields: ty.Optional[dict[str, dict]] = None, output_fields: ty.Optional[dict[str, dict]] = None, bases: ty.Optional[list[type]] = None, - input_bases: ty.Optional[list[type]] = None, - output_bases: ty.Optional[list[type]] = None, + inputs_bases: ty.Optional[list[type]] = None, + outputs_bases: ty.Optional[list[type]] = None, ) -> type: """ Construct an analysis class and validate all the components fit together @@ -39,9 +41,9 @@ def shell_task( of the field. bases : list[type] Base classes for dynamically constructed shell command classes - input_bases : list[type] + inputs_bases : list[type] Base classes for the input spec of dynamically constructed shell command classes - output_bases : list[type] + outputs_bases : list[type] Base classes for the input spec of dynamically constructed shell command classes Returns @@ -50,30 +52,35 @@ def shell_task( the shell command task class """ + annotations = { + "executable": str, + "Inputs": type, + "Outputs": type, + } + dct = {"__annotations__": annotations} + if isinstance(klass_or_name, str): - if None in (executable, input_fields): - raise RuntimeError( - "Dynamically constructed shell tasks require an executable and " - "input_field arguments" - ) name = klass_or_name + if executable is not None: + dct["executable"] = executable + if input_fields is None: + input_fields = {} if output_fields is None: output_fields = {} - - # Ensure bases are lists and can be modified bases = list(bases) if bases is not None else [] - input_bases = list(input_bases) if input_bases is not None else [] - output_bases = list(output_bases) if output_bases is not None else [] + inputs_bases = list(inputs_bases) if inputs_bases is not None else [] + outputs_bases = list(outputs_bases) if outputs_bases is not None else [] # Ensure base classes included somewhere in MRO - def ensure_base_of(base_class: type, bases_list: list[type]): + def ensure_base_included(base_class: type, bases_list: list[type]): if not any(issubclass(b, base_class) for b in bases_list): bases_list.append(base_class) - ensure_base_of(pydra.engine.task.ShellCommandTask, bases) - ensure_base_of(pydra.engine.specs.ShellSpec, input_bases) - ensure_base_of(pydra.engine.specs.ShellOutSpec, output_bases) + # Ensure bases are lists and can be modified + ensure_base_included(pydra.engine.task.ShellCommandTask, bases) + ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) + ensure_base_included(pydra.engine.specs.ShellOutSpec, outputs_bases) def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): annotations = {} @@ -86,82 +93,108 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): Inputs = attrs.define(kw_only=True, slots=False)( type( - "Inputs", tuple(input_bases), convert_to_attrs(input_fields, shell_arg) + "Inputs", + tuple(inputs_bases), + convert_to_attrs(input_fields, shell_arg), ) ) + Outputs = attrs.define(kw_only=True, slots=False)( type( "Outputs", - tuple(output_bases), + tuple(outputs_bases), convert_to_attrs(output_fields, shell_out), ) ) + else: if ( executable, input_fields, output_fields, bases, - input_bases, - output_bases, + inputs_bases, + outputs_bases, ) != (None, None, None, None, None, None): raise RuntimeError( - "When used as a decorator on a class `shell_task` should not be provided " - "executable, input_field or output_field arguments" + "When used as a decorator on a class, `shell_task` should not be " + "provided any other arguments" ) klass = klass_or_name name = klass.__name__ + + bases = [klass] + if not issubclass(klass, pydra.engine.task.ShellCommandTask): + bases.append(pydra.engine.task.ShellCommandTask) + try: executable = klass.executable - except KeyError: + except AttributeError: raise RuntimeError( - "Classes decorated by `shell_task` should contain an `executable` attribute " - "specifying the shell tool to run" + "Classes decorated by `shell_task` should contain an `executable` " + "attribute specifying the shell tool to run" ) try: Inputs = klass.Inputs except KeyError: - raise RuntimeError( - "Classes decorated by `shell_task` should contain an `Inputs` class attribute " - "specifying the inputs to the shell tool" + raise AttributeError( + "Classes decorated by `shell_task` should contain an `Inputs` class " + "attribute specifying the inputs to the shell tool" ) try: Outputs = klass.Outputs - except KeyError: - Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,)) + except AttributeError: + Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) Inputs = attrs.define(kw_only=True, slots=False)(Inputs) Outputs = attrs.define(kw_only=True, slots=False)(Outputs) - if not issubclass(Inputs, pydra.engine.specs.ShellSpec): - Inputs = attrs.define(kw_only=True, slots=False)( - type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) - ) + if not issubclass(Inputs, pydra.engine.specs.ShellSpec): + Inputs = attrs.define(kw_only=True, slots=False)( + type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) + ) - if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): - Outputs = attrs.define(kw_only=True, slots=False)( - type("Outputs", (Outputs, pydra.engine.specs.ShellOutSpec), {}) - ) + template_fields = _gen_output_template_fields(Inputs, Outputs) - bases = [klass] - if not issubclass(klass, pydra.engine.task.ShellCommandTask): - bases.append(pydra.engine.task.ShellCommandTask) + if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): + outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) + wrap_output = True + else: + outputs_bases = (Outputs,) + wrap_output = False - dct = { - "executable": executable, - "Inputs": Inputs, - "Outputs": Outputs, - "__annotations__": { - "executable": str, - "inputs": Inputs, - "outputs": Outputs, - "Inputs": type, - "Outputs": type, - }, - } + if wrap_output or template_fields: + Outputs = attrs.define(kw_only=True, slots=False)( + type("Outputs", outputs_bases, template_fields) + ) - return type(name, tuple(bases), dct) + dct["Inputs"] = Inputs + dct["Outputs"] = Outputs + + task_klass = type(name, tuple(bases), dct) + task_klass.input_spec = pydra.engine.specs.SpecInfo( + name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) + ) + task_klass.output_spec = pydra.engine.specs.SpecInfo( + name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) + ) + if not hasattr(task_klass, "executable"): + raise RuntimeError( + "Classes generated by `shell_task` should contain an `executable` " + "attribute specifying the shell tool to run" + ) + if not hasattr(task_klass, "Inputs"): + raise RuntimeError( + "Classes generated by `shell_task` should contain an `Inputs` class " + "attribute specifying the inputs to the shell tool" + ) + if not hasattr(task_klass, "Outputs"): + raise RuntimeError( + "Classes generated by `shell_task` should contain an `Outputs` class " + "attribute specifying the outputs to the shell tool" + ) + return task_klass def shell_arg( @@ -324,3 +357,45 @@ def shell_out( return attrs.field( metadata={k: v for k, v in metadata.items() if v is not None}, **kwargs ) + + +def _gen_output_template_fields(Inputs: type, Outputs: type) -> tuple[dict, dict]: + """Auto-generates output fields for inputs that specify an 'output_file_template' + + Parameters + ---------- + Inputs : type + Input specification class + Outputs : type + Output specification class + + Returns + ------- + template_fields: dict[str, attrs._CountingAttribute] + the template fields to add to the output spec + + Raises + ------ + RuntimeError + _description_ + """ + annotations = {} + template_fields = {"__annotations__": annotations} + output_field_names = [f.name for f in attrs.fields(Outputs)] + for fld in attrs.fields(Inputs): + if "output_file_template" in fld.metadata: + if "output_field_name" in fld.metadata: + field_name = fld.metadata["output_field_name"] + else: + field_name = fld.name + # skip adding if the field already in the output_spec + exists_already = field_name in output_field_names + if not exists_already: + metadata = { + "help_string": fld.metadata["help_string"], + "mandatory": fld.metadata["mandatory"], + "keep_extension": fld.metadata["keep_extension"], + } + template_fields[field_name] = attrs.field(metadata=metadata) + annotations[field_name] = str + return template_fields diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell_commands.py index 018849558e..fc2edd8291 100644 --- a/pydra/mark/tests/test_shell_commands.py +++ b/pydra/mark/tests/test_shell_commands.py @@ -29,6 +29,7 @@ class Inputs: help_string="the directory to list the contents of", argstr="", mandatory=True, + position=-1, ) hidden: bool = shell_arg( help_string=("display hidden FS objects"), @@ -59,7 +60,7 @@ class Inputs: date_format_str: str = shell_arg( help_string="format string for ", argstr="-D", - default=None, + default=attrs.NOTHING, requires=["long_format"], xor=["complete_date"], ) @@ -80,6 +81,7 @@ class Outputs: "help_string": "the directory to list the contents of", "argstr": "", "mandatory": True, + "position": -1, }, "hidden": { "type": bool, @@ -161,33 +163,121 @@ def test_shell_task_pickle_roundtrip(Ls, tmpdir): assert RereadLs is Ls -@pytest.mark.xfail( - reason=( - "Need to change relationship between Inputs/Outputs and input_spec/output_spec " - "for the task to run" - ) -) -def test_shell_task_init(Ls, tmpdir): - inputs = Ls.Inputs(directory=tmpdir) - assert inputs.directory == tmpdir - assert not inputs.hidden - outputs = Ls.Outputs(entries=[]) - assert outputs.entries == [] - - -@pytest.mark.xfail( - reason=( - "Need to change relationship between Inputs/Outputs and input_spec/output_spec " - "for the task to run" - ) -) def test_shell_task_run(Ls, tmpdir): Path.touch(tmpdir / "a") Path.touch(tmpdir / "b") Path.touch(tmpdir / "c") - ls = Ls(directory=tmpdir) + ls = Ls(directory=tmpdir, long_format=True) + # Test cmdline + assert ls.inputs.directory == tmpdir + assert not ls.inputs.hidden + assert ls.inputs.long_format + assert ls.cmdline == f"ls -l {tmpdir}" + + # Drop Long format flag to make output simpler + ls = Ls(directory=tmpdir) result = ls() assert result.output.entries == ["a", "b", "c"] + + +@pytest.fixture(params=["static", "dynamic"]) +def A(request): + if request.param == "static": + + @shell_task + class A: + executable = "cp" + + class Inputs: + x: os.PathLike = shell_arg( + help_string="an input file", argstr="", position=0 + ) + y: str = shell_arg( + help_string="an input file", + output_file_template="{x}_out", + argstr="", + ) + + elif request.param == "dynamic": + A = shell_task( + "A", + executable="cp", + input_fields={ + "x": { + "type": os.PathLike, + "help_string": "an input file", + "argstr": "", + "position": 0, + }, + "y": { + "type": str, + "help_string": "an output file", + "argstr": "", + "output_file_template": "{x}_out", + }, + }, + ) + else: + assert False + + return A + + +def get_file_size(y: Path): + result = os.stat(y) + return result.st_size + + +def test_shell_task_bases_dynamic(A, tmpdir): + B = shell_task( + "B", + output_fields={ + "out_file_size": { + "type": int, + "help_string": "size of the output directory", + "callable": get_file_size, + } + }, + bases=[A], + inputs_bases=[A.Inputs], + ) + + xpath = tmpdir / "x.txt" + ypath = tmpdir / "y.txt" + Path.touch(xpath) + + b = B(x=xpath, y=str(ypath)) + + result = b() + + assert b.inputs.x == xpath + assert result.output.y == str(ypath) + + +def test_shell_task_dynamic_inputs_bases(tmpdir): + A = shell_task( + "A", + "ls", + input_fields={ + "directory": {"type": os.PathLike, "help_string": "input directory"} + }, + ) + B = shell_task( + "B", + "ls", + input_fields={ + "hidden": { + "type": bool, + "help_string": "show hidden files", + "default": False, + } + }, + inputs_bases=[A.Inputs], + ) + + b = B(directory=tmpdir) + + assert b.inputs.directory == tmpdir From c4d1875cf8afed9c7ebf1a798af7981126462f85 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 012/342] renamed pydra.mark.shell_commands to pydra.mark.shell --- pydra/mark/__init__.py | 2 +- pydra/mark/{shell_commands.py => shell.py} | 0 .../{test_shell_commands.py => test_shell.py} | 21 ++++++++++++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) rename pydra/mark/{shell_commands.py => shell.py} (100%) rename pydra/mark/tests/{test_shell_commands.py => test_shell.py} (94%) diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py index 2ff75d3b0f..f2434e5a1c 100644 --- a/pydra/mark/__init__.py +++ b/pydra/mark/__init__.py @@ -1,4 +1,4 @@ from .functions import annotate, task -from .shell_commands import shell_task, shell_arg, shell_out +from .shell import shell_task, shell_arg, shell_out __all__ = ("annotate", "task", "shell_task", "shell_arg", "shell_out") diff --git a/pydra/mark/shell_commands.py b/pydra/mark/shell.py similarity index 100% rename from pydra/mark/shell_commands.py rename to pydra/mark/shell.py diff --git a/pydra/mark/tests/test_shell_commands.py b/pydra/mark/tests/test_shell.py similarity index 94% rename from pydra/mark/tests/test_shell_commands.py rename to pydra/mark/tests/test_shell.py index fc2edd8291..c13816e157 100644 --- a/pydra/mark/tests/test_shell_commands.py +++ b/pydra/mark/tests/test_shell.py @@ -242,7 +242,6 @@ def test_shell_task_bases_dynamic(A, tmpdir): } }, bases=[A], - inputs_bases=[A.Inputs], ) xpath = tmpdir / "x.txt" @@ -257,6 +256,26 @@ def test_shell_task_bases_dynamic(A, tmpdir): assert result.output.y == str(ypath) +def test_shell_task_bases_static(A, tmpdir): + @shell_task + class B(A): + class Outputs: + out_file_size: int = shell_out( + help_string="size of the output directory", callable=get_file_size + ) + + xpath = tmpdir / "x.txt" + ypath = tmpdir / "y.txt" + Path.touch(xpath) + + b = B(x=xpath, y=str(ypath)) + + result = b() + + assert b.inputs.x == xpath + assert result.output.y == str(ypath) + + def test_shell_task_dynamic_inputs_bases(tmpdir): A = shell_task( "A", From d25eab0a2ba90878a65e2e4a560ed489f7b25fc6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 013/342] fixed up inheritance of Inputs and Outputs --- pydra/mark/shell.py | 44 ++++++++++++++----- pydra/mark/tests/test_shell.py | 77 +++++++++++++++++++++++++++++----- 2 files changed, 100 insertions(+), 21 deletions(-) diff --git a/pydra/mark/shell.py b/pydra/mark/shell.py index 9ce8b9364d..d4e92eb115 100644 --- a/pydra/mark/shell.py +++ b/pydra/mark/shell.py @@ -60,6 +60,7 @@ def shell_task( dct = {"__annotations__": annotations} if isinstance(klass_or_name, str): + # Dynamically created classes using shell_task as a function name = klass_or_name if executable is not None: @@ -77,6 +78,19 @@ def ensure_base_included(base_class: type, bases_list: list[type]): if not any(issubclass(b, base_class) for b in bases_list): bases_list.append(base_class) + # Get inputs and outputs bases from base class if not explicitly provided + for base in bases: + if not inputs_bases: + try: + inputs_bases = [base.Inputs] + except AttributeError: + pass + if not outputs_bases: + try: + outputs_bases = [base.Outputs] + except AttributeError: + pass + # Ensure bases are lists and can be modified ensure_base_included(pydra.engine.task.ShellCommandTask, bases) ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) @@ -108,6 +122,7 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): ) else: + # Statically defined classes using shell_task as decorator if ( executable, input_fields, @@ -147,8 +162,12 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): except AttributeError: Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) - Inputs = attrs.define(kw_only=True, slots=False)(Inputs) - Outputs = attrs.define(kw_only=True, slots=False)(Outputs) + # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. + # not in a base class) + if "Inputs" in klass.__dict__: + Inputs = attrs.define(kw_only=True, slots=False)(Inputs) + if "Outputs" in klass.__dict__: + Outputs = attrs.define(kw_only=True, slots=False)(Outputs) if not issubclass(Inputs, pydra.engine.specs.ShellSpec): Inputs = attrs.define(kw_only=True, slots=False)( @@ -159,12 +178,12 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) - wrap_output = True + add_base_class = True else: outputs_bases = (Outputs,) - wrap_output = False + add_base_class = False - if wrap_output or template_fields: + if add_base_class or template_fields: Outputs = attrs.define(kw_only=True, slots=False)( type("Outputs", outputs_bases, template_fields) ) @@ -173,12 +192,7 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): dct["Outputs"] = Outputs task_klass = type(name, tuple(bases), dct) - task_klass.input_spec = pydra.engine.specs.SpecInfo( - name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) - ) - task_klass.output_spec = pydra.engine.specs.SpecInfo( - name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) - ) + if not hasattr(task_klass, "executable"): raise RuntimeError( "Classes generated by `shell_task` should contain an `executable` " @@ -194,6 +208,14 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): "Classes generated by `shell_task` should contain an `Outputs` class " "attribute specifying the outputs to the shell tool" ) + + task_klass.input_spec = pydra.engine.specs.SpecInfo( + name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) + ) + task_klass.output_spec = pydra.engine.specs.SpecInfo( + name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) + ) + return task_klass diff --git a/pydra/mark/tests/test_shell.py b/pydra/mark/tests/test_shell.py index c13816e157..b456aa648f 100644 --- a/pydra/mark/tests/test_shell.py +++ b/pydra/mark/tests/test_shell.py @@ -132,7 +132,7 @@ class Outputs: return Ls -def test_shell_task_fields(Ls): +def test_shell_fields(Ls): assert [a.name for a in attrs.fields(Ls.Inputs)] == [ "executable", "args", @@ -152,7 +152,7 @@ def test_shell_task_fields(Ls): ] -def test_shell_task_pickle_roundtrip(Ls, tmpdir): +def test_shell_pickle_roundtrip(Ls, tmpdir): pkl_file = tmpdir / "ls.pkl" with open(pkl_file, "wb") as f: cp.dump(Ls, f) @@ -163,7 +163,7 @@ def test_shell_task_pickle_roundtrip(Ls, tmpdir): assert RereadLs is Ls -def test_shell_task_run(Ls, tmpdir): +def test_shell_run(Ls, tmpdir): Path.touch(tmpdir / "a") Path.touch(tmpdir / "b") Path.touch(tmpdir / "c") @@ -196,7 +196,7 @@ class Inputs: help_string="an input file", argstr="", position=0 ) y: str = shell_arg( - help_string="an input file", + help_string="path of output file", output_file_template="{x}_out", argstr="", ) @@ -214,7 +214,7 @@ class Inputs: }, "y": { "type": str, - "help_string": "an output file", + "help_string": "path of output file", "argstr": "", "output_file_template": "{x}_out", }, @@ -231,7 +231,7 @@ def get_file_size(y: Path): return result.st_size -def test_shell_task_bases_dynamic(A, tmpdir): +def test_shell_bases_dynamic(A, tmpdir): B = shell_task( "B", output_fields={ @@ -256,7 +256,7 @@ def test_shell_task_bases_dynamic(A, tmpdir): assert result.output.y == str(ypath) -def test_shell_task_bases_static(A, tmpdir): +def test_shell_bases_static(A, tmpdir): @shell_task class B(A): class Outputs: @@ -276,12 +276,24 @@ class Outputs: assert result.output.y == str(ypath) -def test_shell_task_dynamic_inputs_bases(tmpdir): +def test_shell_inputs_outputs_bases_dynamic(tmpdir): A = shell_task( "A", "ls", input_fields={ - "directory": {"type": os.PathLike, "help_string": "input directory"} + "directory": { + "type": os.PathLike, + "help_string": "input directory", + "argstr": "", + "position": -1, + } + }, + output_fields={ + "entries": { + "type": list, + "help_string": "list of entries returned by ls command", + "callable": list_entries, + } }, ) B = shell_task( @@ -290,13 +302,58 @@ def test_shell_task_dynamic_inputs_bases(tmpdir): input_fields={ "hidden": { "type": bool, + "argstr": "-a", "help_string": "show hidden files", "default": False, } }, + bases=[A], inputs_bases=[A.Inputs], ) - b = B(directory=tmpdir) + Path.touch(tmpdir / ".hidden") + + b = B(directory=tmpdir, hidden=True) + + assert b.inputs.directory == tmpdir + assert b.inputs.hidden + assert b.cmdline == f"ls -a {tmpdir}" + + result = b() + assert result.output.entries == [".", "..", ".hidden"] + + +def test_shell_inputs_outputs_bases_static(tmpdir): + @shell_task + class A: + executable = "ls" + + class Inputs: + directory: os.PathLike = shell_arg( + help_string="input directory", argstr="", position=-1 + ) + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + @shell_task + class B(A): + class Inputs(A.Inputs): + hidden: bool = shell_arg( + help_string="show hidden files", + argstr="-a", + default=False, + ) + + Path.touch(tmpdir / ".hidden") + + b = B(directory=tmpdir, hidden=True) assert b.inputs.directory == tmpdir + assert b.inputs.hidden + + result = b() + assert result.output.entries == [".", "..", ".hidden"] From e4095a77a22b16800815f65e1256acb3fd099fd2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 014/342] added tests for output_file_template/output_field_name and various uncovered cases in shell_task decorator --- pydra/mark/shell.py | 27 ++------- pydra/mark/tests/test_shell.py | 108 +++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 21 deletions(-) diff --git a/pydra/mark/shell.py b/pydra/mark/shell.py index d4e92eb115..9abdcf61fe 100644 --- a/pydra/mark/shell.py +++ b/pydra/mark/shell.py @@ -151,8 +151,8 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): ) try: Inputs = klass.Inputs - except KeyError: - raise AttributeError( + except AttributeError: + raise RuntimeError( "Classes decorated by `shell_task` should contain an `Inputs` class " "attribute specifying the inputs to the shell tool" ) @@ -198,16 +198,6 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): "Classes generated by `shell_task` should contain an `executable` " "attribute specifying the shell tool to run" ) - if not hasattr(task_klass, "Inputs"): - raise RuntimeError( - "Classes generated by `shell_task` should contain an `Inputs` class " - "attribute specifying the inputs to the shell tool" - ) - if not hasattr(task_klass, "Outputs"): - raise RuntimeError( - "Classes generated by `shell_task` should contain an `Outputs` class " - "attribute specifying the outputs to the shell tool" - ) task_klass.input_spec = pydra.engine.specs.SpecInfo( name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) @@ -381,25 +371,20 @@ def shell_out( ) -def _gen_output_template_fields(Inputs: type, Outputs: type) -> tuple[dict, dict]: +def _gen_output_template_fields(Inputs: type, Outputs: type) -> dict: """Auto-generates output fields for inputs that specify an 'output_file_template' Parameters ---------- Inputs : type - Input specification class + Inputs specification class Outputs : type - Output specification class + Outputs specification class Returns ------- - template_fields: dict[str, attrs._CountingAttribute] + template_fields: dict[str, attrs._make_CountingAttribute] the template fields to add to the output spec - - Raises - ------ - RuntimeError - _description_ """ annotations = {} template_fields = {"__annotations__": annotations} diff --git a/pydra/mark/tests/test_shell.py b/pydra/mark/tests/test_shell.py index b456aa648f..6fee7259b1 100644 --- a/pydra/mark/tests/test_shell.py +++ b/pydra/mark/tests/test_shell.py @@ -226,6 +226,53 @@ class Inputs: return A +def test_shell_output_file_template(A): + assert "y" in [a.name for a in attrs.fields(A.Outputs)] + + +def test_shell_output_field_name_static(): + @shell_task + class A: + executable = "cp" + + class Inputs: + x: os.PathLike = shell_arg( + help_string="an input file", argstr="", position=0 + ) + y: str = shell_arg( + help_string="path of output file", + output_file_template="{x}_out", + output_field_name="y_out", + argstr="", + ) + + assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] + + +def test_shell_output_field_name_dynamic(): + A = shell_task( + "A", + executable="cp", + input_fields={ + "x": { + "type": os.PathLike, + "help_string": "an input file", + "argstr": "", + "position": 0, + }, + "y": { + "type": str, + "help_string": "path of output file", + "argstr": "", + "output_field_name": "y_out", + "output_file_template": "{x}_out", + }, + }, + ) + + assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] + + def get_file_size(y: Path): result = os.stat(y) return result.st_size @@ -357,3 +404,64 @@ class Inputs(A.Inputs): result = b() assert result.output.entries == [".", "..", ".hidden"] + + +def test_shell_missing_executable_static(): + with pytest.raises(RuntimeError, match="should contain an `executable`"): + + @shell_task + class A: + class Inputs: + directory: os.PathLike = shell_arg( + help_string="input directory", argstr="", position=-1 + ) + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + +def test_shell_missing_executable_dynamic(): + with pytest.raises(RuntimeError, match="should contain an `executable`"): + A = shell_task( + "A", + executable=None, + input_fields={ + "directory": { + "type": os.PathLike, + "help_string": "input directory", + "argstr": "", + "position": -1, + } + }, + output_fields={ + "entries": { + "type": list, + "help_string": "list of entries returned by ls command", + "callable": list_entries, + } + }, + ) + + +def test_shell_missing_inputs_static(): + with pytest.raises(RuntimeError, match="should contain an `Inputs`"): + + @shell_task + class A: + executable = "ls" + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + +def test_shell_decorator_misuse(A): + with pytest.raises( + RuntimeError, match=("`shell_task` should not be provided any other arguments") + ): + shell_task(A, executable="cp") From e9a58bc61eb7f159a23cf074979cdc61883ea370 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 24 Jul 2024 07:57:34 +1000 Subject: [PATCH 015/342] converted to hatch build system and made proper namespace package --- pydra/__init__.py | 49 -------------------------------- pydra/design/__init__.py | 0 pyproject.toml | 61 ++++++++++++++++------------------------ 3 files changed, 24 insertions(+), 86 deletions(-) delete mode 100644 pydra/__init__.py create mode 100644 pydra/design/__init__.py diff --git a/pydra/__init__.py b/pydra/__init__.py deleted file mode 100644 index f704d670a5..0000000000 --- a/pydra/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -The Pydra workflow engine. - -Pydra is a rewrite of the Nipype engine with mapping and joining as -first-class operations. It forms the core of the Nipype 2.0 ecosystem. - -""" - -# This call enables pydra.tasks to be used as a namespace package when installed -# in editable mode. In normal installations it has no effect. -__path__ = __import__("pkgutil").extend_path(__path__, __name__) - -import logging - -import __main__ -import attr - -from . import mark -from .engine import AuditFlag, ShellCommandTask, Submitter, Workflow, specs - -__all__ = ( - "Submitter", - "Workflow", - "AuditFlag", - "ShellCommandTask", - "specs", - "mark", -) - -try: - from ._version import __version__ -except ImportError: - pass - -logger = logging.getLogger("pydra") - - -def check_latest_version(): - import etelemetry - - return etelemetry.check_available_version("nipype/pydra", __version__, lgr=logger) - - -# Run telemetry on import for interactive sessions, such as IPython, Jupyter notebooks, Python REPL -if not hasattr(__main__, "__file__"): - from .engine.core import TaskBase - - if TaskBase._etelemetry_version_data is None: - TaskBase._etelemetry_version_data = check_latest_version() diff --git a/pydra/design/__init__.py b/pydra/design/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pyproject.toml b/pyproject.toml index ba862339cd..150e6b8dba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] -requires = ["flit_scm"] -build-backend = "flit_scm:buildapi" +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" [project] name = "pydra" @@ -18,19 +18,12 @@ dependencies = [ "typing_extensions >=4.6.3; python_version < '3.10'", "typing_utils >=0.1.0; python_version < '3.10'", ] -license = {file = "LICENSE"} -authors = [ - {name = "Nipype developers", email = "neuroimaging@python.org"}, -] +license = { file = "LICENSE" } +authors = [{ name = "Nipype developers", email = "neuroimaging@python.org" }] maintainers = [ - {name = "Nipype developers", email = "neuroimaging@python.org"}, -] -keywords = [ - "brainweb", - "dataflow", - "neuroimaging", - "pydra", + { name = "Nipype developers", email = "neuroimaging@python.org" }, ] +keywords = ["brainweb", "dataflow", "neuroimaging", "pydra"] classifiers = [ "Development Status :: 3 - Alpha", "Environment :: Console", @@ -39,28 +32,19 @@ classifiers = [ "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", ] dynamic = ["version"] [project.optional-dependencies] -psij = [ - "psij-python", -] -dask = [ - "dask", - "distributed", -] -dev = [ - "black", - "pre-commit", - "pydra[test]", -] +psij = ["psij-python"] +dask = ["dask", "distributed"] +dev = ["black", "pre-commit", "pydra[test]"] doc = [ "packaging", "sphinx ==6.2.1", @@ -84,9 +68,7 @@ test = [ "boutiques", "pympler", ] -jupyter = [ - "nest_asyncio" -] +jupyter = ["nest_asyncio"] # Aliases tests = ["pydra[test]"] docs = ["pydra[doc]"] @@ -97,18 +79,23 @@ documentation = "https://nipype.github.io/pydra/" homepage = "https://nipype.github.io/pydra/" repository = "https://github.com/nipype/pydra.git" -[tool.flit.module] -name = "pydra" +[tool.hatch.build] +packages = ["pydra"] +exclude = ["tests"] +include = ["./pydra"] + +[tool.hatch.version] +source = "vcs" -[tool.flit.sdist] -exclude = [".gitignore"] +[tool.hatch.build.hooks.vcs] +version-file = "pydra/engine/_version.py" -[tool.setuptools_scm] -write_to = "pydra/_version.py" +[tool.hatch.metadata] +allow-direct-references = true [tool.black] -target-version = ['py37', 'py38'] -exclude = "pydra/_version.py" +target-version = ['py38'] +exclude = "pydra/engine/_version.py" [tool.codespell] ignore-words-list = "nd,afile,inpt" From e8bce868879188ad20cea84a64754b7e0a6ce97a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 2 Aug 2024 08:15:21 +1000 Subject: [PATCH 016/342] initial structure of 'design' sub-package --- .gitignore | 1 + pydra/design/python.py | 15 + pydra/design/shell.py | 424 ++++++++++++++++++++++++++++ pydra/design/tests/test_shell.py | 467 +++++++++++++++++++++++++++++++ 4 files changed, 907 insertions(+) create mode 100644 pydra/design/python.py create mode 100644 pydra/design/shell.py create mode 100644 pydra/design/tests/test_shell.py diff --git a/.gitignore b/.gitignore index da16b937b9..fe1935c4bb 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ cov.xml # This can be generated in-tree. We never want to commit it. pydra/_version.py +pydra/engine/_version.py diff --git a/pydra/design/python.py b/pydra/design/python.py new file mode 100644 index 0000000000..ed746fe01b --- /dev/null +++ b/pydra/design/python.py @@ -0,0 +1,15 @@ +import typing as ty +import attrs + + +@attrs.define(kw_only=True) +class arg: + help_string: str + default: ty.Any = attrs.NOTHING + mandatory: bool = False + allowed_values: list = None + requires: list = None + xor: list = None + copyfile: bool = None + keep_extension: bool = True + readonly: bool = False diff --git a/pydra/design/shell.py b/pydra/design/shell.py new file mode 100644 index 0000000000..c26fa215e3 --- /dev/null +++ b/pydra/design/shell.py @@ -0,0 +1,424 @@ +"""Decorators and helper functions to create ShellCommandTasks used in Pydra workflows""" + +from __future__ import annotations +import typing as ty +import attrs +from .python import arg as python_arg + +# import os +import pydra.engine.specs + + +@attrs.define(kw_only=True) +class arg(python_arg): + argstr: str = None + position: int = None + sep: str = None + allowed_values: list = None + container_path: bool = False + output_file_template: str = None + output_field_name: str = None + keep_extension: bool = True + readonly: bool = False + formatter: ty.Callable = None + + +# def arg( +# help_string: str, +# default: ty.Any = attrs.NOTHING, +# argstr: str = None, +# position: int = None, +# mandatory: bool = False, +# sep: str = None, +# allowed_values: list = None, +# requires: list = None, +# xor: list = None, +# copyfile: bool = None, +# container_path: bool = False, +# output_file_template: str = None, +# output_field_name: str = None, +# keep_extension: bool = True, +# readonly: bool = False, +# formatter: ty.Callable = None, +# **kwargs, +# ): +# """ +# Returns an attrs field with appropriate metadata for it to be added as an argument in +# a Pydra shell command task definition + +# Parameters +# ------------ +# help_string: str +# A short description of the input field. +# default : Any, optional +# the default value for the argument +# argstr: str, optional +# A flag or string that is used in the command before the value, e.g. -v or +# -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, +# the flag is used before every element if a list is provided as a value. If no +# argstr is used the field is not part of the command. +# position: int, optional +# Position of the field in the command, could be nonnegative or negative integer. +# If nothing is provided the field will be inserted between all fields with +# nonnegative positions and fields with negative positions. +# mandatory: bool, optional +# If True user has to provide a value for the field, by default it is False +# sep: str, optional +# A separator if a list is provided as a value. +# allowed_values: list, optional +# List of allowed values for the field. +# requires: list, optional +# List of field names that are required together with the field. +# xor: list, optional +# List of field names that are mutually exclusive with the field. +# copyfile: bool, optional +# If True, a hard link is created for the input file in the output directory. If +# hard link not possible, the file is copied to the output directory, by default +# it is False +# container_path: bool, optional +# If True a path will be consider as a path inside the container (and not as a +# local path, by default it is False +# output_file_template: str, optional +# If provided, the field is treated also as an output field and it is added to +# the output spec. The template can use other fields, e.g. {file1}. Used in order +# to create an output specification. +# output_field_name: str, optional +# If provided the field is added to the output spec with changed name. Used in +# order to create an output specification. Used together with output_file_template +# keep_extension: bool, optional +# A flag that specifies if the file extension should be removed from the field value. +# Used in order to create an output specification, by default it is True +# readonly: bool, optional +# If True the input field can’t be provided by the user but it aggregates other +# input fields (for example the fields with argstr: -o {fldA} {fldB}), by default +# it is False +# formatter: function, optional +# If provided the argstr of the field is created using the function. This function +# can for example be used to combine several inputs into one command argument. The +# function can take field (this input field will be passed to the function), +# inputs (entire inputs will be passed) or any input field name (a specific input +# field will be sent). +# **kwargs +# remaining keyword arguments are passed onto the underlying attrs.field function +# """ + +# metadata = { +# "help_string": help_string, +# "argstr": argstr, +# "position": position, +# "mandatory": mandatory, +# "sep": sep, +# "allowed_values": allowed_values, +# "requires": requires, +# "xor": xor, +# "copyfile": copyfile, +# "container_path": container_path, +# "output_file_template": output_file_template, +# "output_field_name": output_field_name, +# "keep_extension": keep_extension, +# "readonly": readonly, +# "formatter": formatter, +# } + +# return attrs.field( +# default=default, +# metadata={k: v for k, v in metadata.items() if v is not None}, +# **kwargs, +# ) + + +def out( + help_string: str, + mandatory: bool = False, + output_file_template: str = None, + output_field_name: str = None, + keep_extension: bool = True, + requires: list = None, + callable: ty.Callable = None, + **kwargs, +): + """Returns an attrs field with appropriate metadata for it to be added as an output of + a Pydra shell command task definition + + Parameters + ---------- + help_string: str + A short description of the input field. The same as in input_spec. + mandatory: bool, default: False + If True the output file has to exist, otherwise an error will be raised. + output_file_template: str, optional + If provided the output file name (or list of file names) is created using the + template. The template can use other fields, e.g. {file1}. The same as in + input_spec. + output_field_name: str, optional + If provided the field is added to the output spec with changed name. The same as + in input_spec. Used together with output_file_template + keep_extension: bool, default: True + A flag that specifies if the file extension should be removed from the field + value. The same as in input_spec. + requires: list + List of field names that are required to create a specific output. The fields + do not have to be a part of the output_file_template and if any field from the + list is not provided in the input, a NOTHING is returned for the specific output. + This has a different meaning than the requires form the input_spec. + callable: Callable + If provided the output file name (or list of file names) is created using the + function. The function can take field (the specific output field will be passed + to the function), output_dir (task output_dir will be used), stdout, stderr + (stdout and stderr of the task will be sent) inputs (entire inputs will be + passed) or any input field name (a specific input field will be sent). + **kwargs + remaining keyword arguments are passed onto the underlying attrs.field function + """ + metadata = { + "help_string": help_string, + "mandatory": mandatory, + "output_file_template": output_file_template, + "output_field_name": output_field_name, + "keep_extension": keep_extension, + "requires": requires, + "callable": callable, + } + + return attrs.field( + metadata={k: v for k, v in metadata.items() if v is not None}, **kwargs + ) + + +def task( + klass_or_name: ty.Union[type, str], + executable: ty.Optional[str] = None, + input_fields: ty.Optional[dict[str, dict]] = None, + output_fields: ty.Optional[dict[str, dict]] = None, + bases: ty.Optional[list[type]] = None, + inputs_bases: ty.Optional[list[type]] = None, + outputs_bases: ty.Optional[list[type]] = None, +) -> type: + """ + Construct an analysis class and validate all the components fit together + + Parameters + ---------- + klass_or_name : type or str + Either the class decorated by the @shell_task decorator or the name for a + dynamically generated class + executable : str, optional + If dynamically constructing a class (instead of decorating an existing one) the + name of the executable to run is provided + input_fields : dict[str, dict], optional + If dynamically constructing a class (instead of decorating an existing one) the + input fields can be provided as a dictionary of dictionaries, where the keys + are the name of the fields and the dictionary contents are passed as keyword + args to cmd_arg, with the exception of "type", which is used as the type annotation + of the field. + output_fields : dict[str, dict], optional + If dynamically constructing a class (instead of decorating an existing one) the + output fields can be provided as a dictionary of dictionaries, where the keys + are the name of the fields and the dictionary contents are passed as keyword + args to cmd_out, with the exception of "type", which is used as the type annotation + of the field. + bases : list[type] + Base classes for dynamically constructed shell command classes + inputs_bases : list[type] + Base classes for the input spec of dynamically constructed shell command classes + outputs_bases : list[type] + Base classes for the input spec of dynamically constructed shell command classes + + Returns + ------- + type + the shell command task class + """ + + annotations = { + "executable": str, + "Inputs": type, + "Outputs": type, + } + dct = {"__annotations__": annotations} + + if isinstance(klass_or_name, str): + # Dynamically created classes using shell_task as a function + name = klass_or_name + + if executable is not None: + dct["executable"] = executable + if input_fields is None: + input_fields = {} + if output_fields is None: + output_fields = {} + bases = list(bases) if bases is not None else [] + inputs_bases = list(inputs_bases) if inputs_bases is not None else [] + outputs_bases = list(outputs_bases) if outputs_bases is not None else [] + + # Ensure base classes included somewhere in MRO + def ensure_base_included(base_class: type, bases_list: list[type]): + if not any(issubclass(b, base_class) for b in bases_list): + bases_list.append(base_class) + + # Get inputs and outputs bases from base class if not explicitly provided + for base in bases: + if not inputs_bases: + try: + inputs_bases = [base.Inputs] + except AttributeError: + pass + if not outputs_bases: + try: + outputs_bases = [base.Outputs] + except AttributeError: + pass + + # Ensure bases are lists and can be modified + ensure_base_included(pydra.engine.task.ShellCommandTask, bases) + ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) + ensure_base_included(pydra.engine.specs.ShellOutSpec, outputs_bases) + + def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): + annotations = {} + attrs_dict = {"__annotations__": annotations} + for name, dct in fields.items(): + kwargs = dict(dct) # copy to avoid modifying input to outer function + annotations[name] = kwargs.pop("type") + attrs_dict[name] = attrs_func(**kwargs) + return attrs_dict + + Inputs = attrs.define(kw_only=True, slots=False)( + type( + "Inputs", + tuple(inputs_bases), + convert_to_attrs(input_fields, arg), + ) + ) + + Outputs = attrs.define(kw_only=True, slots=False)( + type( + "Outputs", + tuple(outputs_bases), + convert_to_attrs(output_fields, out), + ) + ) + + else: + # Statically defined classes using shell_task as decorator + if ( + executable, + input_fields, + output_fields, + bases, + inputs_bases, + outputs_bases, + ) != (None, None, None, None, None, None): + raise RuntimeError( + "When used as a decorator on a class, `shell_task` should not be " + "provided any other arguments" + ) + klass = klass_or_name + name = klass.__name__ + + bases = [klass] + if not issubclass(klass, pydra.engine.task.ShellCommandTask): + bases.append(pydra.engine.task.ShellCommandTask) + + try: + executable = klass.executable + except AttributeError: + raise RuntimeError( + "Classes decorated by `shell_task` should contain an `executable` " + "attribute specifying the shell tool to run" + ) + try: + Inputs = klass.Inputs + except AttributeError: + raise RuntimeError( + "Classes decorated by `shell_task` should contain an `Inputs` class " + "attribute specifying the inputs to the shell tool" + ) + + try: + Outputs = klass.Outputs + except AttributeError: + Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) + + # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. + # not in a base class) + if "Inputs" in klass.__dict__: + Inputs = attrs.define(kw_only=True, slots=False)(Inputs) + if "Outputs" in klass.__dict__: + Outputs = attrs.define(kw_only=True, slots=False)(Outputs) + + if not issubclass(Inputs, pydra.engine.specs.ShellSpec): + Inputs = attrs.define(kw_only=True, slots=False)( + type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) + ) + + template_fields = _gen_output_template_fields(Inputs, Outputs) + + if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): + outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) + add_base_class = True + else: + outputs_bases = (Outputs,) + add_base_class = False + + if add_base_class or template_fields: + Outputs = attrs.define(kw_only=True, slots=False)( + type("Outputs", outputs_bases, template_fields) + ) + + dct["Inputs"] = Inputs + dct["Outputs"] = Outputs + + task_klass = type(name, tuple(bases), dct) + + if not hasattr(task_klass, "executable"): + raise RuntimeError( + "Classes generated by `shell_task` should contain an `executable` " + "attribute specifying the shell tool to run" + ) + + task_klass.input_spec = pydra.engine.specs.SpecInfo( + name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) + ) + task_klass.output_spec = pydra.engine.specs.SpecInfo( + name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) + ) + + return task_klass + + +def _gen_output_template_fields(Inputs: type, Outputs: type) -> dict: + """Auto-generates output fields for inputs that specify an 'output_file_template' + + Parameters + ---------- + Inputs : type + Inputs specification class + Outputs : type + Outputs specification class + + Returns + ------- + template_fields: dict[str, attrs._make_CountingAttribute] + the template fields to add to the output spec + """ + annotations = {} + template_fields = {"__annotations__": annotations} + output_field_names = [f.name for f in attrs.fields(Outputs)] + for fld in attrs.fields(Inputs): + if "output_file_template" in fld.metadata: + if "output_field_name" in fld.metadata: + field_name = fld.metadata["output_field_name"] + else: + field_name = fld.name + # skip adding if the field already in the output_spec + exists_already = field_name in output_field_names + if not exists_already: + metadata = { + "help_string": fld.metadata["help_string"], + "mandatory": fld.metadata["mandatory"], + "keep_extension": fld.metadata["keep_extension"], + } + template_fields[field_name] = attrs.field(metadata=metadata) + annotations[field_name] = str + return template_fields diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py new file mode 100644 index 0000000000..ff6b8d7b0c --- /dev/null +++ b/pydra/design/tests/test_shell.py @@ -0,0 +1,467 @@ +import os +import tempfile +import attrs +from pathlib import Path +import pytest +import cloudpickle as cp +from pydra.design import shell + + +def list_entries(stdout): + return stdout.split("\n")[:-1] + + +@pytest.fixture +def tmpdir(): + return Path(tempfile.mkdtemp()) + + +@pytest.fixture(params=["static", "dynamic"]) +def Ls(request): + if request.param == "static": + + @shell + class Ls: + executable = "ls" + + class Inputs: + directory: os.PathLike = shell.arg( + help_string="the directory to list the contents of", + argstr="", + mandatory=True, + position=-1, + ) + hidden: bool = shell.arg( + help_string=("display hidden FS objects"), + argstr="-a", + default=False, + ) + long_format: bool = shell.arg( + help_string=( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + default=False, + argstr="-l", + ) + human_readable: bool = shell_arg( + help_string="display file sizes in human readable form", + argstr="-h", + default=False, + requires=["long_format"], + ) + complete_date: bool = shell_arg( + help_string="Show complete date in long format", + argstr="-T", + default=False, + requires=["long_format"], + xor=["date_format_str"], + ) + date_format_str: str = shell_arg( + help_string="format string for ", + argstr="-D", + default=attrs.NOTHING, + requires=["long_format"], + xor=["complete_date"], + ) + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + elif request.param == "dynamic": + Ls = shell_task( + "Ls", + executable="ls", + input_fields={ + "directory": { + "type": os.PathLike, + "help_string": "the directory to list the contents of", + "argstr": "", + "mandatory": True, + "position": -1, + }, + "hidden": { + "type": bool, + "help_string": "display hidden FS objects", + "argstr": "-a", + }, + "long_format": { + "type": bool, + "help_string": ( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + "argstr": "-l", + }, + "human_readable": { + "type": bool, + "help_string": "display file sizes in human readable form", + "argstr": "-h", + "requires": ["long_format"], + }, + "complete_date": { + "type": bool, + "help_string": "Show complete date in long format", + "argstr": "-T", + "requires": ["long_format"], + "xor": ["date_format_str"], + }, + "date_format_str": { + "type": str, + "help_string": "format string for ", + "argstr": "-D", + "requires": ["long_format"], + "xor": ["complete_date"], + }, + }, + output_fields={ + "entries": { + "type": list, + "help_string": "list of entries returned by ls command", + "callable": list_entries, + } + }, + ) + + else: + assert False + + return Ls + + +def test_shell_fields(Ls): + assert [a.name for a in attrs.fields(Ls.Inputs)] == [ + "executable", + "args", + "directory", + "hidden", + "long_format", + "human_readable", + "complete_date", + "date_format_str", + ] + + assert [a.name for a in attrs.fields(Ls.Outputs)] == [ + "return_code", + "stdout", + "stderr", + "entries", + ] + + +def test_shell_pickle_roundtrip(Ls, tmpdir): + pkl_file = tmpdir / "ls.pkl" + with open(pkl_file, "wb") as f: + cp.dump(Ls, f) + + with open(pkl_file, "rb") as f: + RereadLs = cp.load(f) + + assert RereadLs is Ls + + +def test_shell_run(Ls, tmpdir): + Path.touch(tmpdir / "a") + Path.touch(tmpdir / "b") + Path.touch(tmpdir / "c") + + ls = Ls(directory=tmpdir, long_format=True) + + # Test cmdline + assert ls.inputs.directory == tmpdir + assert not ls.inputs.hidden + assert ls.inputs.long_format + assert ls.cmdline == f"ls -l {tmpdir}" + + # Drop Long format flag to make output simpler + ls = Ls(directory=tmpdir) + result = ls() + + assert result.output.entries == ["a", "b", "c"] + + +@pytest.fixture(params=["static", "dynamic"]) +def A(request): + if request.param == "static": + + @shell_task + class A: + executable = "cp" + + class Inputs: + x: os.PathLike = shell_arg( + help_string="an input file", argstr="", position=0 + ) + y: str = shell_arg( + help_string="path of output file", + output_file_template="{x}_out", + argstr="", + ) + + elif request.param == "dynamic": + A = shell_task( + "A", + executable="cp", + input_fields={ + "x": { + "type": os.PathLike, + "help_string": "an input file", + "argstr": "", + "position": 0, + }, + "y": { + "type": str, + "help_string": "path of output file", + "argstr": "", + "output_file_template": "{x}_out", + }, + }, + ) + else: + assert False + + return A + + +def test_shell_output_file_template(A): + assert "y" in [a.name for a in attrs.fields(A.Outputs)] + + +def test_shell_output_field_name_static(): + @shell_task + class A: + executable = "cp" + + class Inputs: + x: os.PathLike = shell_arg( + help_string="an input file", argstr="", position=0 + ) + y: str = shell_arg( + help_string="path of output file", + output_file_template="{x}_out", + output_field_name="y_out", + argstr="", + ) + + assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] + + +def test_shell_output_field_name_dynamic(): + A = shell_task( + "A", + executable="cp", + input_fields={ + "x": { + "type": os.PathLike, + "help_string": "an input file", + "argstr": "", + "position": 0, + }, + "y": { + "type": str, + "help_string": "path of output file", + "argstr": "", + "output_field_name": "y_out", + "output_file_template": "{x}_out", + }, + }, + ) + + assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] + + +def get_file_size(y: Path): + result = os.stat(y) + return result.st_size + + +def test_shell_bases_dynamic(A, tmpdir): + B = shell_task( + "B", + output_fields={ + "out_file_size": { + "type": int, + "help_string": "size of the output directory", + "callable": get_file_size, + } + }, + bases=[A], + ) + + xpath = tmpdir / "x.txt" + ypath = tmpdir / "y.txt" + Path.touch(xpath) + + b = B(x=xpath, y=str(ypath)) + + result = b() + + assert b.inputs.x == xpath + assert result.output.y == str(ypath) + + +def test_shell_bases_static(A, tmpdir): + @shell_task + class B(A): + class Outputs: + out_file_size: int = shell_out( + help_string="size of the output directory", callable=get_file_size + ) + + xpath = tmpdir / "x.txt" + ypath = tmpdir / "y.txt" + Path.touch(xpath) + + b = B(x=xpath, y=str(ypath)) + + result = b() + + assert b.inputs.x == xpath + assert result.output.y == str(ypath) + + +def test_shell_inputs_outputs_bases_dynamic(tmpdir): + A = shell_task( + "A", + "ls", + input_fields={ + "directory": { + "type": os.PathLike, + "help_string": "input directory", + "argstr": "", + "position": -1, + } + }, + output_fields={ + "entries": { + "type": list, + "help_string": "list of entries returned by ls command", + "callable": list_entries, + } + }, + ) + B = shell_task( + "B", + "ls", + input_fields={ + "hidden": { + "type": bool, + "argstr": "-a", + "help_string": "show hidden files", + "default": False, + } + }, + bases=[A], + inputs_bases=[A.Inputs], + ) + + Path.touch(tmpdir / ".hidden") + + b = B(directory=tmpdir, hidden=True) + + assert b.inputs.directory == tmpdir + assert b.inputs.hidden + assert b.cmdline == f"ls -a {tmpdir}" + + result = b() + assert result.output.entries == [".", "..", ".hidden"] + + +def test_shell_inputs_outputs_bases_static(tmpdir): + @shell_task + class A: + executable = "ls" + + class Inputs: + directory: os.PathLike = shell_arg( + help_string="input directory", argstr="", position=-1 + ) + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + @shell_task + class B(A): + class Inputs(A.Inputs): + hidden: bool = shell_arg( + help_string="show hidden files", + argstr="-a", + default=False, + ) + + Path.touch(tmpdir / ".hidden") + + b = B(directory=tmpdir, hidden=True) + + assert b.inputs.directory == tmpdir + assert b.inputs.hidden + + result = b() + assert result.output.entries == [".", "..", ".hidden"] + + +def test_shell_missing_executable_static(): + with pytest.raises(RuntimeError, match="should contain an `executable`"): + + @shell_task + class A: + class Inputs: + directory: os.PathLike = shell_arg( + help_string="input directory", argstr="", position=-1 + ) + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + +def test_shell_missing_executable_dynamic(): + with pytest.raises(RuntimeError, match="should contain an `executable`"): + A = shell_task( + "A", + executable=None, + input_fields={ + "directory": { + "type": os.PathLike, + "help_string": "input directory", + "argstr": "", + "position": -1, + } + }, + output_fields={ + "entries": { + "type": list, + "help_string": "list of entries returned by ls command", + "callable": list_entries, + } + }, + ) + + +def test_shell_missing_inputs_static(): + with pytest.raises(RuntimeError, match="should contain an `Inputs`"): + + @shell_task + class A: + executable = "ls" + + class Outputs: + entries: list = shell_out( + help_string="list of entries returned by ls command", + callable=list_entries, + ) + + +def test_shell_decorator_misuse(A): + with pytest.raises( + RuntimeError, match=("`shell_task` should not be provided any other arguments") + ): + shell_task(A, executable="cp") From e1b677e2fd793b4ee91389d50464189b2c261500 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 9 Nov 2024 23:46:15 +1100 Subject: [PATCH 017/342] implementing new decorator syntax --- pydra/design/__init__.py | 6 + pydra/design/base.py | 548 ++++++++++++++++++++++ pydra/design/python.py | 86 +++- pydra/design/shell.py | 3 +- pydra/design/tests/test_python.py | 318 +++++++++++++ pydra/design/tests/test_shell.py | 91 ++-- pydra/engine/__init__.py | 5 - pydra/engine/audit.py | 6 +- pydra/engine/boutiques.py | 6 +- pydra/engine/core.py | 54 +-- pydra/engine/helpers.py | 265 +++-------- pydra/engine/helpers_file.py | 6 +- pydra/engine/specs.py | 22 +- pydra/engine/submitter.py | 2 +- pydra/engine/task.py | 86 +++- pydra/engine/tests/test_helpers.py | 21 +- pydra/engine/tests/test_node_task.py | 4 +- pydra/engine/tests/test_numpy_examples.py | 4 +- pydra/engine/tests/test_profiles.py | 2 +- pydra/engine/tests/test_specs.py | 14 +- pydra/engine/tests/test_submitter.py | 6 +- pydra/engine/tests/test_task.py | 29 +- pydra/engine/tests/test_tasks_files.py | 2 +- pydra/engine/tests/test_workflow.py | 2 +- pydra/engine/tests/utils.py | 2 +- pydra/engine/workers.py | 18 +- pydra/mark/functions.py | 2 +- pydra/mark/tests/test_functions.py | 2 +- pydra/utils/tests/test_typing.py | 4 +- pydra/utils/typing.py | 4 +- 30 files changed, 1234 insertions(+), 386 deletions(-) create mode 100644 pydra/design/base.py create mode 100644 pydra/design/tests/test_python.py diff --git a/pydra/design/__init__.py b/pydra/design/__init__.py index e69de29bb2..9135ecd93c 100644 --- a/pydra/design/__init__.py +++ b/pydra/design/__init__.py @@ -0,0 +1,6 @@ +from .base import Interface, fields +from . import python +from . import shell + + +__all__ = ["Interface", "fields", "python", "shell"] diff --git a/pydra/design/base.py b/pydra/design/base.py new file mode 100644 index 0000000000..d6b0f3f572 --- /dev/null +++ b/pydra/design/base.py @@ -0,0 +1,548 @@ +import typing as ty +import types +import inspect +import re +import enum +from copy import copy +import attrs.validators +from fileformats.generic import File +from pydra.utils.typing import TypeParser +from pydra.engine.helpers import from_list_if_single, ensure_list +from pydra.engine.specs import ( + LazyField, + MultiInputObj, + MultiInputFile, + MultiOutputObj, + MultiOutputFile, +) +from pydra.engine.core import Task, AuditFlag + +__all__ = [ + "Field", + "Arg", + "Out", + "Interface", + "collate_fields", + "make_interface", + "fields", +] + + +class _Empty(enum.Enum): + + EMPTY = enum.auto() + + def __repr__(self): + return "EMPTY" + + def __bool__(self): + return False + + +EMPTY = _Empty.EMPTY # To provide a blank placeholder for the default field + + +def is_type(_, __, val: ty.Any) -> bool: + """check that the value is a type or generic""" + return inspect.isclass(val) or ty.get_origin(val) + + +@attrs.define(kw_only=True) +class Field: + help_string: str = "" + mandatory: bool = False + name: str | None = None + type: ty.Type[ty.Any] | None = attrs.field(validator=is_type, default=ty.Any) + + +@attrs.define(kw_only=True) +class Arg(Field): + default: ty.Any = EMPTY + allowed_values: list = None + requires: list = None + xor: list = None + copy_mode: File.CopyMode = File.CopyMode.any + copy_collation: File.CopyCollation = File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single + readonly: bool = False + + +@attrs.define(kw_only=True) +class Out(Field): + requires: list = None + callable: ty.Callable = None + + +OutputType = ty.TypeVar("OutputType") + + +class Interface(ty.Generic[OutputType]): + + Task: ty.Type[Task] + + def __call__( + self, + interface, + name: str | None = None, + audit_flags: AuditFlag = AuditFlag.NONE, + cache_dir=None, + cache_locations=None, + inputs: ty.Text | File | dict[str, ty.Any] | None = None, + cont_dim=None, + messenger_args=None, + messengers=None, + rerun=False, + **kwargs, + ): + task = self.Task( + self, + interface, + name=name, + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + inputs=inputs, + cont_dim=cont_dim, + messenger_args=messenger_args, + messengers=messengers, + rerun=rerun, + ) + return task(**kwargs) + + +def collate_fields( + arg_type: type[Arg], + out_type: type[Out], + doc_string: str | None = None, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + input_helps: dict[str, str] | None = None, + output_helps: dict[str, str] | None = None, +) -> tuple[dict[str, Arg], dict[str, Out]]: + + if inputs is None: + inputs = [] + elif isinstance(inputs, list): + inputs = [ + a if isinstance(a, Arg) else arg_type(a, help_string=input_helps.get(a, "")) + for a in inputs + ] + elif isinstance(inputs, dict): + inputs_list = [] + for input_name, arg in inputs.items(): + if isinstance(arg, Arg): + if arg.name is None: + arg.name = input_name + elif arg.name != input_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {arg.name} and the key " + f"is {input_name}" + ) + else: + arg.name = input_name + if not arg.help_string: + arg.help_string = input_helps.get(input_name, "") + else: + arg = arg_type( + type=arg, + name=input_name, + help_string=input_helps.get(input_name, ""), + ) + inputs_list.append(arg) + inputs = inputs_list + + if outputs is None: + outputs = [] + elif isinstance(outputs, list): + outputs = [ + ( + o + if isinstance(o, Out) + else out_type(name=o, type=ty.Any, help_string=output_helps.get(o, "")) + ) + for o in outputs + ] + elif isinstance(outputs, dict): + for output_name, out in outputs.items(): + if isinstance(out, Out): + if out.name is None: + out.name = output_name + elif out.name != output_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {out.name} and the key " + f"is {output_name}" + ) + else: + out.name = output_name + if not out.help_string: + out.help_string = output_helps.get(output_name, "") + outputs = [ + ( + o + if isinstance(o, out_type) + else out_type(name=n, type=o, help_string=output_helps.get(n, "")) + ) + for n, o in outputs.items() + ] + + return inputs, outputs + + +def get_fields_from_class( + klass: type, + arg_type: type[Arg], + out_type: type[Out], + auto_attribs: bool, +) -> tuple[list[Field], list[Field]]: + """Parse the input and output fields from a class""" + + input_helps, _ = parse_doc_string(klass.__doc__) + + def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: + """Get the fields from a class""" + fields_dict = {} + for atr_name in dir(klass): + if atr_name.startswith("__"): + continue + try: + atr = getattr(klass, atr_name) + except Exception: + continue + if isinstance(atr, field_type): + atr.name = atr_name + fields_dict[atr_name] = atr + for atr_name, type_ in klass.__annotations__.items(): + try: + fields_dict[atr_name].type = type_ + except KeyError: + if auto_attribs: + fields_dict[atr_name] = field_type(name=atr_name, type=type_) + for atr_name, help in helps.items(): + try: + fields_dict[atr_name].help_string = help + except KeyError: + pass + return fields_dict.values() + + inputs = get_fields(klass, arg_type, auto_attribs, input_helps) + + outputs_klass = get_outputs_class(klass) + output_helps, _ = parse_doc_string(outputs_klass.__doc__) + if outputs_klass is None: + raise ValueError(f"Nested Outputs class not found in {klass.__name__}") + outputs = get_fields(outputs_klass, out_type, auto_attribs, output_helps) + + return inputs, outputs + + +def get_outputs_class(klass: type | None = None) -> type | None: + if klass is None: + return None + try: + outputs_klass = klass.Outputs + except AttributeError: + try: + interface_class = next( + b for b in klass.__mro__ if ty.get_origin(b) is Interface + ) + except StopIteration: + outputs_klass = None + else: + outputs_klass = ty.get_args(interface_class)[0] + return outputs_klass + + +def make_interface( + task_type: type[Task], + inputs: list[Arg], + outputs: list[Out], + klass: type | None = None, + name: str | None = None, +): + if name is None and klass is not None: + name = klass.__name__ + outputs_klass = get_outputs_class(klass) + if outputs_klass is None: + outputs_klass = type("Outputs", (), {}) + else: + # Ensure that the class has it's own annotaitons dict so we can modify it without + # messing up other classes + outputs_klass.__annotations__ = copy(outputs_klass.__annotations__) + # Now that we have saved the attributes in lists to be + for out in outputs: + setattr( + outputs_klass, + out.name, + attrs.field( + converter=get_converter(out, outputs_klass.__name__), + metadata={PYDRA_ATTR_METADATA: out}, + on_setattr=attrs.setters.convert, + ), + ) + outputs_klass.__annotations__[out.name] = out.type + outputs_klass = attrs.define(auto_attribs=False)(outputs_klass) + + if klass is None or not issubclass(klass, Interface): + if name is None: + raise ValueError("name must be provided if klass is not") + klass = types.new_class( + name=name, + bases=(Interface[outputs_klass],), + kwds={}, + exec_body=lambda ns: ns.update( + {"Task": task_type, "Outputs": outputs_klass} + ), + ) + else: + # Ensure that the class has it's own annotaitons dict so we can modify it without + # messing up other classes + klass.__annotations__ = copy(klass.__annotations__) + # Now that we have saved the attributes in lists to be + for arg in inputs: + setattr( + klass, + arg.name, + attrs.field( + default=arg.default if arg.default is not EMPTY else attrs.NOTHING, + converter=get_converter(arg, klass.__name__), + validator=get_validator(arg, klass.__name__), + metadata={PYDRA_ATTR_METADATA: arg}, + on_setattr=attrs.setters.convert, + ), + ) + klass.__annotations__[arg.name] = arg.type + + # Create class using attrs package, will create attributes for all columns and + # parameters + attrs_klass = attrs.define(auto_attribs=False)(klass) + + return attrs_klass + + +def get_converter(field: Field, interface_name: str): + checker_label = f"'{field.name}' field of {interface_name} interface" + type_checker = TypeParser[field.type]( + field.type, label=checker_label, superclass_auto_cast=True + ) + if field.type in (MultiInputObj, MultiInputFile): + converter = attrs.converters.pipe(ensure_list, type_checker) + elif field.type in (MultiOutputObj, MultiOutputFile): + converter = attrs.converters.pipe(from_list_if_single, type_checker) + else: + converter = type_checker + return converter + + +def get_validator(field: Field, interface_name: str): + if field.allowed_values: + if field._validator is None: + field._validator = allowed_values_validator + elif isinstance(field._validator, ty.Iterable): + if allowed_values_validator not in field._validator: + field._validator.append(allowed_values_validator) + elif field._validator is not allowed_values_validator: + field._validator = [ + field._validator, + allowed_values_validator, + ] + + +def allowed_values_validator(_, attribute, value): + """checking if the values is in allowed_values""" + allowed = attribute.metadata[PYDRA_ATTR_METADATA].allowed_values + if value is attrs.NOTHING or isinstance(value, LazyField): + pass + elif value not in allowed: + raise ValueError( + f"value of {attribute.name} has to be from {allowed}, but {value} provided" + ) + + +def extract_inputs_and_outputs_from_function( + function: ty.Callable, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, +) -> tuple[dict[str, type | Arg], dict[str, type | Out]]: + """Extract input output types and output names from the function source if they + aren't explicitly""" + sig = inspect.signature(function) + input_types = { + p.name: (p.annotation if p.annotation is not inspect._empty else ty.Any) + for p in sig.parameters.values() + } + if inputs: + if not isinstance(inputs, dict): + raise ValueError( + f"Input names ({inputs}) should not be provided when " + "wrapping/decorating a function as " + ) + for inpt_name, type_ in input_types.items(): + try: + inpt = inputs[inpt_name] + except KeyError: + inputs[inpt_name] = type_ + else: + if isinstance(inpt, Arg) and inpt.type is ty.Any: + inpt.type = type_ + else: + inputs = input_types + return_type = ( + sig.return_annotation if sig.return_annotation is not inspect._empty else ty.Any + ) + if outputs is None: + src = inspect.getsource(function).strip() + return_lines = re.findall(r"\n\s+return .*$", src) + if len(return_lines) == 1 and src.endswith(return_lines[0]): + implicit_outputs = [ + o.strip() + for o in re.match(r"\s*return\s+(.*)", return_lines[0]) + .group(1) + .split(",") + ] + if all(re.match(r"^\w+$", o) for o in implicit_outputs): + outputs = implicit_outputs + if isinstance(outputs, list) and len(outputs) > 1: + if return_type is not ty.Any: + if ty.get_origin(return_type) is not tuple: + raise ValueError( + f"Multiple outputs specified ({outputs}) but non-tuple " + f"return value {return_type}" + ) + return_types = ty.get_args(return_type) + if len(return_types) != len(outputs): + raise ValueError( + f"Length of the outputs ({outputs}) does not match that " + f"of the return types ({return_types})" + ) + outputs = dict(zip(outputs, return_types)) + elif not isinstance(outputs, dict): + if outputs: + if not isinstance(outputs, list): + raise ValueError( + f"Unrecognised format for outputs ({outputs}), should be a list " + "or dict" + ) + output_name = outputs[0] + else: + output_name = "out" + outputs = {output_name: return_type} + return inputs, outputs + + +def parse_doc_string(doc_str: str) -> tuple[dict[str, str], dict[str, str] | list[str]]: + """Parse the docstring to pull out the description of the parameters/args and returns + + Parameters + ----------- + doc_string + the doc string to parse + + Returns + ------- + input_helps + the documentation for each of the parameter/args of the class/function + output_helps + the documentation for each of the return values of the class function, if no + names are provided then the help strings are returned as a list + """ + input_helps = {} + output_helps = {} + if doc_str is None: + return input_helps, output_helps + for param, param_help in re.findall(r":param (\w+): (.*)", doc_str): + input_helps[param] = param_help + for return_val, return_help in re.findall(r":return (\w+): (.*)", doc_str): + output_helps[return_val] = return_help + google_args_match = re.match( + r".*\n\s+Args:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + ) + google_returns_match = re.match( + r".*\n\s+Returns:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + ) + if google_args_match: + args_str = google_args_match.group(1) + for arg_str in split_block(args_str): + arg_name, arg_help = arg_str.split(":", maxsplit=1) + arg_name = arg_name.strip() + arg_help = white_space_re.sub(" ", arg_help).strip() + input_helps[arg_name] = arg_help + if google_returns_match: + returns_str = google_returns_match.group(1) + for return_str in split_block(returns_str): + return_name, return_help = return_str.split(":", maxsplit=1) + return_name = return_name.strip() + return_help = white_space_re.sub(" ", return_help).strip() + output_helps[return_name] = return_help + numpy_args_match = re.match( + r".*\n\s+Parameters\n\s*---------- *\n(.*)", + doc_str, + flags=re.DOTALL | re.MULTILINE, + ) + numpy_returns_match = re.match( + r".*\n\s+Returns\n\s+------- *\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + ) + if numpy_args_match: + args_str = numpy_args_match.group(1) + for arg_str in split_block(args_str): + arg_decl, arg_help = arg_str.split("\n", maxsplit=1) + arg_name = arg_decl.split(":")[0].strip() + arg_help = white_space_re.sub(" ", arg_help).strip() + input_helps[arg_name] = arg_help + if numpy_returns_match: + returns_str = numpy_returns_match.group(1) + for return_str in split_block(returns_str): + return_decl, return_help = return_str.split("\n", maxsplit=1) + return_name = return_decl.split(":")[0].strip() + return_help = white_space_re.sub(" ", return_help).strip() + output_helps[return_name] = return_help + return input_helps, output_helps + + +def split_block(string: str) -> ty.Generator[str, None, None]: + """Split a block of text into groups lines""" + indent_re = re.compile(r"^\s*") + leading_indent = indent_re.match(string).group() + leading_indent_len = len(leading_indent) + block = "" + for line in string.split("\n"): + if not line.strip(): + break + indent_len = len(indent_re.match(line).group()) + if block and indent_len == leading_indent_len: + yield block.strip() + block = "" + block += line + "\n" + if indent_len < leading_indent_len: + raise ValueError( + f"Indentation block is not consistent in docstring:\n{string}" + ) + if block: + yield block.strip() + + +def fields(interface: Interface) -> list[Field]: + return [ + f.metadata[PYDRA_ATTR_METADATA] + for f in attrs.fields(interface) + if PYDRA_ATTR_METADATA in f.metadata + ] + + +def check_explicit_fields_are_none(klass, inputs, outputs): + if inputs is not None: + raise ValueError( + f"inputs should not be provided to `python.task` ({inputs}) " + f"explicitly when decorated a class ({klass})" + ) + if outputs is not None: + raise ValueError( + f"outputs should not be provided to `python.task` ({outputs}) " + f"explicitly when decorated a class ({klass})" + ) + + +white_space_re = re.compile(r"\s+") + +PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" diff --git a/pydra/design/python.py b/pydra/design/python.py index ed746fe01b..96561ac946 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -1,15 +1,79 @@ import typing as ty +import inspect import attrs +from pydra.engine.task import FunctionTask +from .base import ( + Arg, + Out, + collate_fields, + make_interface, + Interface, + parse_doc_string, + extract_inputs_and_outputs_from_function, + check_explicit_fields_are_none, + get_fields_from_class, +) -@attrs.define(kw_only=True) -class arg: - help_string: str - default: ty.Any = attrs.NOTHING - mandatory: bool = False - allowed_values: list = None - requires: list = None - xor: list = None - copyfile: bool = None - keep_extension: bool = True - readonly: bool = False +__all__ = ["arg", "out", "interface"] + + +@attrs.define +class arg(Arg): + pass + + +@attrs.define +class out(Out): + pass + + +def interface( + wrapped: ty.Callable | None = None, + /, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + auto_attribs: bool = True, +) -> Interface: + + def make( + wrapped: ty.Callable | type | None = None, + ) -> Interface: + + if inspect.isclass(wrapped): + klass = wrapped + function = klass.function + name = klass.__name__ + check_explicit_fields_are_none(klass, inputs, outputs) + parsed_inputs, parsed_outputs = get_fields_from_class( + klass, arg, out, auto_attribs + ) + else: + klass = None + function = wrapped + input_helps, output_helps = parse_doc_string(function.__doc__) + inferred_inputs, inferred_outputs = ( + extract_inputs_and_outputs_from_function(function, inputs, outputs) + ) + name = function.__name__ + + parsed_inputs, parsed_outputs = collate_fields( + arg_type=arg, + out_type=out, + inputs=inferred_inputs, + outputs=inferred_outputs, + input_helps=input_helps, + output_helps=output_helps, + ) + interface = make_interface( + FunctionTask, parsed_inputs, parsed_outputs, name=name, klass=klass + ) + # Set the function in the created class + interface.function = function + return interface + + if wrapped is not None: + if not isinstance(wrapped, ty.Callable): + raise ValueError(f"wrapped must be a callable, not {wrapped!r}") + return make(wrapped) + return make diff --git a/pydra/design/shell.py b/pydra/design/shell.py index c26fa215e3..47fd162116 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -185,7 +185,7 @@ def out( ) -def task( +def interface( klass_or_name: ty.Union[type, str], executable: ty.Optional[str] = None, input_fields: ty.Optional[dict[str, dict]] = None, @@ -232,7 +232,6 @@ def task( annotations = { "executable": str, - "Inputs": type, "Outputs": type, } dct = {"__annotations__": annotations} diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py new file mode 100644 index 0000000000..1b011246f2 --- /dev/null +++ b/pydra/design/tests/test_python.py @@ -0,0 +1,318 @@ +from operator import attrgetter +import pytest +from pydra import design +from pydra.design.python import arg, out, interface +from pydra.engine.task import FunctionTask + + +def test_interface_wrap_function(): + def sample_interface(a: int) -> float: + """Sample function with inputs and outputs""" + return a * 2 + + SampleInterface = interface( + sample_interface, + inputs={"a": arg(help_string="The argument to be doubled")}, + outputs={"b": out(help_string="the doubled output")}, + ) + + assert issubclass(SampleInterface, design.Interface) + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int, help_string="The argument to be doubled"), + ] + assert outputs == [ + out(name="b", type=float, help_string="the doubled output"), + ] + + +def test_interface_wrap_function_type(): + def sample_interface(a: int) -> int: + """Sample function with inputs and outputs""" + return a * 2 + + SampleInterface = interface( + sample_interface, + inputs={"a": float}, + outputs={"b": float}, + ) + + assert issubclass(SampleInterface, design.Interface) + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [arg(name="a", type=float)] + assert outputs == [out(name="b", type=float)] + + +def test_decorated_function_interface(): + @design.python.interface(outputs=["c", "d"]) + def SampleInterface(a: int, b: float) -> tuple[float, float]: + """Sample function for testing""" + return a + b, a * b + + assert issubclass(SampleInterface, design.Interface) + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int), + arg(name="b", type=float), + ] + assert outputs == [ + out(name="c", type=float), + out(name="d", type=float), + ] + assert SampleInterface.function.__name__ == "SampleInterface" + + +def test_interface_with_function_implicit_outputs_from_return_stmt(): + @design.python.interface + def SampleInterface(a: int, b: float) -> tuple[float, float]: + """Sample function for testing""" + c = a + b + d = a * b + return c, d + + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int), + arg(name="b", type=float), + ] + assert outputs == [ + out(name="c", type=float), + out(name="d", type=float), + ] + assert SampleInterface.function.__name__ == "SampleInterface" + + +def test_interface_with_function_docstr(): + @design.python.interface(outputs=["c", "d"]) + def SampleInterface(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + :param a: First input to be inputted + :param b: Second input + :return c: Sum of a and b + :return d: product of a and b + """ + return a + b, a * b + + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int, help_string="First input to be inputted"), + arg(name="b", type=float, help_string="Second input"), + ] + assert outputs == [ + out(name="c", type=float, help_string="Sum of a and b"), + out(name="d", type=float, help_string="product of a and b"), + ] + assert SampleInterface.function.__name__ == "SampleInterface" + + +def test_interface_with_function_google_docstr(): + @design.python.interface(outputs=["c", "d"]) + def SampleInterface(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + Args: + a: First input + to be inputted + b: Second input + + Returns: + c: Sum of a and b + d: Product of a and b + """ + return a + b, a * b + + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int, help_string="First input to be inputted"), + arg(name="b", type=float, help_string="Second input"), + ] + assert outputs == [ + out(name="c", type=float, help_string="Sum of a and b"), + out(name="d", type=float, help_string="Product of a and b"), + ] + assert SampleInterface.function.__name__ == "SampleInterface" + + +def test_interface_with_function_numpy_docstr(): + @design.python.interface( + outputs=["c", "d"] + ) # Could potentiall read output names from doc-string instead + def SampleInterface(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + Parameters + ---------- + a: int + First input + to be inputted + b: float + Second input + + Returns + ------- + c : int + Sum of a and b + d : float + Product of a and b + """ + return a + b, a * b + + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int, help_string="First input to be inputted"), + arg(name="b", type=float, help_string="Second input"), + ] + assert outputs == [ + out(name="c", type=float, help_string="Sum of a and b"), + out(name="d", type=float, help_string="Product of a and b"), + ] + assert SampleInterface.function.__name__ == "SampleInterface" + + +def test_interface_with_class(): + @design.python.interface + class SampleInterface: + """Sample class for testing + + Args: + a: First input + to be inputted + b: Second input + """ + + a: int + b: float + + class Outputs: + """ + Args: + c: Sum of a and b + d: Product of a and b + """ + + c: float + d: float + + @staticmethod + def function(a, b): + return a + b, a * b + + assert issubclass(SampleInterface, design.Interface) + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int, help_string="First input to be inputted"), + arg(name="b", type=float, help_string="Second input"), + ] + assert outputs == [ + out(name="c", type=float, help_string="Sum of a and b"), + out(name="d", type=float, help_string="Product of a and b"), + ] + assert SampleInterface.function.__name__ == "function" + + +def test_interface_with_inheritance(): + @design.python.interface + class SampleInterface(design.Interface["SampleInterface.Outputs"]): + """Sample class for testing + + Args: + a: First input + to be inputted + b: Second input + """ + + a: int + b: float + + class Outputs: + """ + Args: + c: Sum of a and b + d: Product of a and b + """ + + c: float + d: float + + @staticmethod + def function(a, b): + return a + b, a * b + + assert issubclass(SampleInterface, design.Interface) + + +def test_interface_with_class_no_auto_attribs(): + @design.python.interface(auto_attribs=False) + class SampleInterface: + a: int = arg(help_string="First input to be inputted") + b: float = arg(help_string="Second input") + + x: int + + class Outputs: + c: float = out(help_string="Sum of a and b") + d: float = out(help_string="Product of a and b") + + y: str + + @staticmethod + def function(a, b): + return a + b, a * b + + assert SampleInterface.Task is FunctionTask + inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) + outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + assert inputs == [ + arg(name="a", type=int, help_string="First input to be inputted"), + arg(name="b", type=float, help_string="Second input"), + ] + assert outputs == [ + out(name="c", type=float, help_string="Sum of a and b"), + out(name="d", type=float, help_string="Product of a and b"), + ] + assert SampleInterface.function.__name__ == "function" + + +def test_interface_invalid_wrapped1(): + with pytest.raises(ValueError): + + @design.python.interface(inputs={"a": arg()}) + class SampleInterface(design.Interface["SampleInterface.Outputs"]): + a: int + + class Outputs: + b: float + + @staticmethod + def function(a): + return a + 1 + + +def test_interface_invalid_wrapped2(): + with pytest.raises(ValueError): + + @design.python.interface(outputs={"b": out()}) + class SampleInterface(design.Interface["SampleInterface.Outputs"]): + a: int + + class Outputs: + b: float + + @staticmethod + def function(a): + return a + 1 diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index ff6b8d7b0c..cca87179be 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -4,7 +4,7 @@ from pathlib import Path import pytest import cloudpickle as cp -from pydra.design import shell +from pydra.design import shell, Interface def list_entries(stdout): @@ -20,62 +20,61 @@ def tmpdir(): def Ls(request): if request.param == "static": - @shell + @shell.interface class Ls: executable = "ls" - class Inputs: - directory: os.PathLike = shell.arg( - help_string="the directory to list the contents of", - argstr="", - mandatory=True, - position=-1, - ) - hidden: bool = shell.arg( - help_string=("display hidden FS objects"), - argstr="-a", - default=False, - ) - long_format: bool = shell.arg( - help_string=( - "display properties of FS object, such as permissions, size and " - "timestamps " - ), - default=False, - argstr="-l", - ) - human_readable: bool = shell_arg( - help_string="display file sizes in human readable form", - argstr="-h", - default=False, - requires=["long_format"], - ) - complete_date: bool = shell_arg( - help_string="Show complete date in long format", - argstr="-T", - default=False, - requires=["long_format"], - xor=["date_format_str"], - ) - date_format_str: str = shell_arg( - help_string="format string for ", - argstr="-D", - default=attrs.NOTHING, - requires=["long_format"], - xor=["complete_date"], - ) + directory: os.PathLike = shell.arg( + help_string="the directory to list the contents of", + argstr="", + mandatory=True, + position=-1, + ) + hidden: bool = shell.arg( + help_string=("display hidden FS objects"), + argstr="-a", + default=False, + ) + long_format: bool = shell.arg( + help_string=( + "display properties of FS object, such as permissions, size and " + "timestamps " + ), + default=False, + argstr="-l", + ) + human_readable: bool = shell.arg( + help_string="display file sizes in human readable form", + argstr="-h", + default=False, + requires=["long_format"], + ) + complete_date: bool = shell.arg( + help_string="Show complete date in long format", + argstr="-T", + default=False, + requires=["long_format"], + xor=["date_format_str"], + ) + date_format_str: str = shell.arg( + help_string="format string for ", + argstr="-D", + default=attrs.NOTHING, + requires=["long_format"], + xor=["complete_date"], + ) class Outputs: - entries: list = shell_out( + entries: list = shell.out( help_string="list of entries returned by ls command", callable=list_entries, ) elif request.param == "dynamic": - Ls = shell_task( + Ls = shell.task( "Ls", executable="ls", - input_fields={ + inputs={ "directory": { "type": os.PathLike, "help_string": "the directory to list the contents of", @@ -117,7 +116,7 @@ class Outputs: "xor": ["complete_date"], }, }, - output_fields={ + outputs={ "entries": { "type": list, "help_string": "list of entries returned by ls command", diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index 2eca36ba28..3674736c83 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -2,13 +2,8 @@ from .submitter import Submitter from .core import Workflow -from .task import AuditFlag, ShellCommandTask -from . import specs __all__ = [ - "AuditFlag", - "ShellCommandTask", "Submitter", "Workflow", - "specs", ] diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 7397fad6e6..2db771da65 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -3,8 +3,8 @@ import os import json import attr -from ..utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag -from ..utils.hash import hash_function +from pydra.utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag +from pydra.utils.hash import hash_function from .helpers import ensure_list, gather_runtime_info from .specs import attr_fields from fileformats.core import FileSet @@ -70,7 +70,7 @@ def start_audit(self, odir): if self.audit_check(AuditFlag.PROV): self.audit_message(start_message, AuditFlag.PROV) if self.audit_check(AuditFlag.RESOURCE): - from ..utils.profiler import ResourceMonitor + from pydra.utils.profiler import ResourceMonitor self.resource_monitor = ResourceMonitor(os.getpid(), logdir=self.odir) diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py index 0f3cf110e1..65ca0b727e 100644 --- a/pydra/engine/boutiques.py +++ b/pydra/engine/boutiques.py @@ -5,9 +5,9 @@ from pathlib import Path from functools import reduce -from ..utils.messenger import AuditFlag -from ..engine import ShellCommandTask -from ..engine.specs import SpecInfo, ShellSpec, ShellOutSpec, File, attr_fields +from pydra.utils.messenger import AuditFlag +from pydra.engine.task import ShellCommandTask +from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutSpec, File, attr_fields from .helpers_file import is_local_file diff --git a/pydra/engine/core.py b/pydra/engine/core.py index d0081e3ace..c0fb2ad17e 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -33,7 +33,7 @@ StateArray, ) from .helpers import ( - make_klass, + # make_klass, create_checksum, print_help, load_result, @@ -43,12 +43,11 @@ PydraFileLock, parse_copyfile, ) -from ..utils.hash import hash_function +from pydra.utils.hash import hash_function from .helpers_file import copy_nested_files, template_update from .graph import DiGraph from .audit import Audit -from ..utils.messenger import AuditFlag -from ..utils.typing import TypeParser +from pydra.utils.messenger import AuditFlag from fileformats.core import FileSet logger = logging.getLogger("pydra") @@ -56,7 +55,7 @@ develop = False -class TaskBase: +class Task: """ A base structure for the nodes in the processing graph. @@ -86,7 +85,8 @@ class TaskBase: def __init__( self, - name: str, + interface, + name: str | None = None, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cache_locations=None, @@ -135,28 +135,28 @@ def __init__( """ from .. import check_latest_version - if TaskBase._etelemetry_version_data is None: - TaskBase._etelemetry_version_data = check_latest_version() + if Task._etelemetry_version_data is None: + Task._etelemetry_version_data = check_latest_version() + self.interface = interface # raise error if name is same as of attributes if name in dir(self): raise ValueError("Cannot use names of attributes or methods as task name") self.name = name if not self.input_spec: raise Exception("No input_spec in class: %s" % self.__class__.__name__) - klass = make_klass(self.input_spec) - self.inputs = klass( + self.inputs = self.interface( **{ # in attrs names that starts with "_" could be set when name provided w/o "_" (f.name[1:] if f.name.startswith("_") else f.name): f.default - for f in attr.fields(klass) + for f in attr.fields(self.interface) } ) self.input_names = [ field.name - for field in attr.fields(klass) + for field in attr.fields(self.interface) if field.name not in ["_func", "_graph_checksums"] ] @@ -215,8 +215,7 @@ def __str__(self): def __getstate__(self): state = self.__dict__.copy() - state["input_spec"] = cp.dumps(state["input_spec"]) - state["output_spec"] = cp.dumps(state["output_spec"]) + state["interface"] = cp.dumps(state["interface"]) inputs = {} for k, v in attr.asdict(state["inputs"], recurse=False).items(): if k.startswith("_"): @@ -226,9 +225,8 @@ def __getstate__(self): return state def __setstate__(self, state): - state["input_spec"] = cp.loads(state["input_spec"]) - state["output_spec"] = cp.loads(state["output_spec"]) - state["inputs"] = make_klass(state["input_spec"])(**state["inputs"]) + state["interface"] = cp.loads(state["interface"]) + state["inputs"] = self.interface(**state["inputs"]) self.__dict__.update(state) @cached_property @@ -348,7 +346,7 @@ def output_names(self): """Get the names of the outputs from the task's output_spec (not everything has to be generated, see generated_output_names). """ - return [f.name for f in attr.fields(make_klass(self.output_spec))] + return [f.name for f in attr.fields(self.interface.Outputs)] @property def generated_output_names(self): @@ -357,7 +355,7 @@ def generated_output_names(self): it uses output_names. The results depends on the input provided to the task """ - output_klass = make_klass(self.output_spec) + output_klass = self.interface.Outputs if hasattr(output_klass, "generated_output_names"): output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} @@ -474,6 +472,8 @@ def _modify_inputs(self): execution (they will be replaced after the task's execution with the original inputs to ensure the tasks checksums are consistent) """ + from pydra.utils.typing import TypeParser + orig_inputs = { k: v for k, v in attr.asdict(self.inputs, recurse=False).items() @@ -583,7 +583,7 @@ def _run(self, rerun=False, environment=None, **kwargs): return result def _collect_outputs(self, output_dir): - output_klass = make_klass(self.output_spec) + output_klass = self.interface.Outputs output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} ) @@ -1030,7 +1030,7 @@ def _sanitize_spec( raise ValueError(f'Empty "{spec_name}" spec provided to Workflow {wf_name}.') -class Workflow(TaskBase): +class Workflow(Task): """A composite task with structure of computational graph.""" def __init__( @@ -1353,7 +1353,7 @@ def set_output( single or list of tuples linking the name of the output to a lazy output of a task in the workflow. """ - from ..utils.typing import TypeParser + from pydra.utils.typing import TypeParser if self._connections is None: self._connections = [] @@ -1372,9 +1372,7 @@ def set_output( # checking if a new output name is already in the connections connection_names = [name for name, _ in self._connections] if self.output_spec: - output_types = { - a.name: a.type for a in attr.fields(make_klass(self.output_spec)) - } + output_types = {a.name: a.type for a in attr.fields(self.interface.Outputs)} else: output_types = {} # Check for type matches with explicitly defined outputs @@ -1409,12 +1407,12 @@ def set_output( help_string = f"all outputs from {task_nm}" fields.append((wf_out_nm, dict, {"help_string": help_string})) else: - from ..utils.typing import TypeParser + from pydra.utils.typing import TypeParser # getting information about the output field from the task output_spec # providing proper type and some help string task_output_spec = getattr(self, task_nm).output_spec - out_fld = attr.fields_dict(make_klass(task_output_spec))[task_out_nm] + out_fld = attr.fields_dict(task_output_spec)[task_out_nm] help_string = ( f"{out_fld.metadata.get('help_string', '')} (from {task_nm})" ) @@ -1427,7 +1425,7 @@ def set_output( logger.info("Added %s to %s", self.output_spec, self) def _collect_outputs(self): - output_klass = make_klass(self.output_spec) + output_klass = self.interface.Outputs output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} ) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 2aa88d9bc9..f443a6fe69 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -7,13 +7,11 @@ import sys from uuid import uuid4 import getpass -import typing as ty import subprocess as sp import re from time import strftime from traceback import format_exception import attr -import attrs # New defaults from filelock import SoftFileLock, Timeout import cloudpickle as cp from .specs import ( @@ -21,46 +19,12 @@ attr_fields, Result, LazyField, - File, ) from .helpers_file import copy_nested_files -from ..utils.typing import TypeParser from fileformats.core import FileSet -from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile -def ensure_list(obj, tuple2list=False): - """ - Return a list whatever the input object is. - - Examples - -------- - >>> ensure_list(list("abc")) - ['a', 'b', 'c'] - >>> ensure_list("abc") - ['abc'] - >>> ensure_list(tuple("abc")) - [('a', 'b', 'c')] - >>> ensure_list(tuple("abc"), tuple2list=True) - ['a', 'b', 'c'] - >>> ensure_list(None) - [] - >>> ensure_list(5.0) - [5.0] - - """ - if obj is attr.NOTHING: - return attr.NOTHING - if obj is None: - return [] - # list or numpy.array (this might need some extra flag in case an array has to be converted) - elif isinstance(obj, list) or hasattr(obj, "__array__"): - return obj - elif tuple2list and isinstance(obj, tuple): - return list(obj) - elif isinstance(obj, LazyField): - return obj - return [obj] +# from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile def from_list_if_single(obj): @@ -78,10 +42,9 @@ def from_list_if_single(obj): def print_help(obj): """Visit a task object and print its input/output interface.""" lines = [f"Help for {obj.__class__.__name__}"] - input_klass = make_klass(obj.input_spec) - if attr.fields(input_klass): + if attr.fields(obj.interface): lines += ["Input Parameters:"] - for f in attr.fields(input_klass): + for f in attr.fields(obj.interface): default = "" if f.default != attr.NOTHING and not f.name.startswith("_"): default = f" (default: {f.default})" @@ -90,7 +53,7 @@ def print_help(obj): except AttributeError: name = str(f.type) lines += [f"- {f.name}: {name}{default}"] - output_klass = make_klass(obj.output_spec) + output_klass = obj.interface.Outputs if attr.fields(output_klass): lines += ["Output Parameters:"] for f in attr.fields(output_klass): @@ -214,95 +177,6 @@ def gather_runtime_info(fname): return runtime -def make_klass(spec): - """ - Create a data class given a spec. - - Parameters - ---------- - spec : - TODO - - """ - if spec is None: - return None - fields = spec.fields - if fields: - newfields = {} - for item in fields: - if len(item) == 2: - name = item[0] - if isinstance(item[1], attr._make._CountingAttr): - newfield = item[1] - else: - newfield = attr.ib(type=item[1]) - else: - if ( - any([isinstance(ii, attr._make._CountingAttr) for ii in item]) - or len(item) > 4 - ): - raise ValueError( - "syntax not valid, you can use (name, attr), " - "(name, type, default), (name, type, default, metadata)" - "or (name, type, metadata)" - ) - kwargs = {} - if len(item) == 3: - name, tp = item[:2] - if isinstance(item[-1], dict) and "help_string" in item[-1]: - mdata = item[-1] - kwargs["metadata"] = mdata - else: - kwargs["default"] = item[-1] - elif len(item) == 4: - name, tp, dflt, mdata = item - kwargs["default"] = dflt - kwargs["metadata"] = mdata - newfield = attr.ib( - type=tp, - **kwargs, - ) - checker_label = f"'{name}' field of {spec.name}" - type_checker = TypeParser[newfield.type]( - newfield.type, label=checker_label, superclass_auto_cast=True - ) - if newfield.type in (MultiInputObj, MultiInputFile): - converter = attr.converters.pipe(ensure_list, type_checker) - elif newfield.type in (MultiOutputObj, MultiOutputFile): - converter = attr.converters.pipe(from_list_if_single, type_checker) - else: - converter = type_checker - newfield.converter = converter - newfield.on_setattr = attr.setters.convert - if "allowed_values" in newfield.metadata: - if newfield._validator is None: - newfield._validator = allowed_values_validator - elif isinstance(newfield._validator, ty.Iterable): - if allowed_values_validator not in newfield._validator: - newfield._validator.append(allowed_values_validator) - elif newfield._validator is not allowed_values_validator: - newfield._validator = [ - newfield._validator, - allowed_values_validator, - ] - newfields[name] = newfield - fields = newfields - return attrs.make_class( - spec.name, fields, bases=spec.bases, kw_only=True, on_setattr=None - ) - - -def allowed_values_validator(_, attribute, value): - """checking if the values is in allowed_values""" - allowed = attribute.metadata["allowed_values"] - if value is attr.NOTHING or isinstance(value, LazyField): - pass - elif value not in allowed: - raise ValueError( - f"value of {attribute.name} has to be from {allowed}, but {value} provided" - ) - - async def read_stream_and_display(stream, display): """ Read from stream line by line until EOF, display, and capture the lines. @@ -482,36 +356,36 @@ def get_open_loop(): return loop -def output_from_inputfields(output_spec, input_spec): - """ - Collect values from output from input fields. - If names_only is False, the output_spec is updated, - if names_only is True only the names are returned - - Parameters - ---------- - output_spec : - TODO - input_spec : - TODO - - """ - current_output_spec_names = [f.name for f in attr.fields(make_klass(output_spec))] - new_fields = [] - for fld in attr.fields(make_klass(input_spec)): - if "output_file_template" in fld.metadata: - if "output_field_name" in fld.metadata: - field_name = fld.metadata["output_field_name"] - else: - field_name = fld.name - # not adding if the field already in the output_spec - if field_name not in current_output_spec_names: - # TODO: should probably remove some of the keys - new_fields.append( - (field_name, attr.ib(type=File, metadata=fld.metadata)) - ) - output_spec.fields += new_fields - return output_spec +# def output_from_inputfields(interface: "Interface"): +# """ +# Collect values from output from input fields. +# If names_only is False, the output_spec is updated, +# if names_only is True only the names are returned + +# Parameters +# ---------- +# output_spec : +# TODO +# input_spec : +# TODO + +# """ +# current_output_spec_names = [f.name for f in attr.fields(interface.Outputs)] +# new_fields = [] +# for fld in attr.fields(interface): +# if "output_file_template" in fld.metadata: +# if "output_field_name" in fld.metadata: +# field_name = fld.metadata["output_field_name"] +# else: +# field_name = fld.name +# # not adding if the field already in the output_spec +# if field_name not in current_output_spec_names: +# # TODO: should probably remove some of the keys +# new_fields.append( +# (field_name, attr.ib(type=File, metadata=fld.metadata)) +# ) +# output_spec.fields += new_fields +# return output_spec def get_available_cpus(): @@ -640,43 +514,6 @@ def position_sort(args): return [arg for _, arg in pos] + none + [arg for _, arg in neg] -def argstr_formatting(argstr, inputs, value_updates=None): - """formatting argstr that have form {field_name}, - using values from inputs and updating with value_update if provided - """ - inputs_dict = attr.asdict(inputs, recurse=False) - # if there is a value that has to be updated (e.g. single value from a list) - if value_updates: - inputs_dict.update(value_updates) - # getting all fields that should be formatted, i.e. {field_name}, ... - inp_fields = parse_format_string(argstr) - val_dict = {} - for fld_name in inp_fields: - fld_value = inputs_dict[fld_name] - fld_attr = getattr(attrs.fields(type(inputs)), fld_name) - if fld_value is attr.NOTHING or ( - fld_value is False - and fld_attr.type is not bool - and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) - ): - # if value is NOTHING, nothing should be added to the command - val_dict[fld_name] = "" - else: - val_dict[fld_name] = fld_value - - # formatting string based on the val_dict - argstr_formatted = argstr.format(**val_dict) - # removing extra commas and spaces after removing the field that have NOTHING - argstr_formatted = ( - argstr_formatted.replace("[ ", "[") - .replace(" ]", "]") - .replace("[,", "[") - .replace(",]", "]") - .strip() - ) - return argstr_formatted - - class PydraFileLock: """Wrapper for filelock's SoftFileLock that makes it work with asyncio.""" @@ -755,3 +592,37 @@ def parse_format_string(fmtstr): all_keywords = re.findall(full_field, fmtstr) return set().union(*all_keywords) - {""} + + +def ensure_list(obj, tuple2list=False): + """ + Return a list whatever the input object is. + + Examples + -------- + >>> ensure_list(list("abc")) + ['a', 'b', 'c'] + >>> ensure_list("abc") + ['abc'] + >>> ensure_list(tuple("abc")) + [('a', 'b', 'c')] + >>> ensure_list(tuple("abc"), tuple2list=True) + ['a', 'b', 'c'] + >>> ensure_list(None) + [] + >>> ensure_list(5.0) + [5.0] + + """ + if obj is attr.NOTHING: + return attr.NOTHING + if obj is None: + return [] + # list or numpy.array (this might need some extra flag in case an array has to be converted) + elif isinstance(obj, list) or hasattr(obj, "__array__"): + return obj + elif tuple2list and isinstance(obj, tuple): + return list(obj) + elif isinstance(obj, LazyField): + return obj + return [obj] diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index f194533ac7..8be955b20e 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -72,7 +72,7 @@ def copy_nested_files( **kwargs passed directly onto FileSet.copy() """ - from ..utils.typing import TypeParser # noqa + from pydra.utils.typing import TypeParser # noqa cache: ty.Dict[FileSet, FileSet] = {} @@ -150,7 +150,7 @@ def template_update_single( """ # if input_dict_st with state specific value is not available, # the dictionary will be created from inputs object - from ..utils.typing import TypeParser # noqa + from pydra.utils.typing import TypeParser # noqa from pydra.engine.specs import LazyField, OUTPUT_TEMPLATE_TYPES if inputs_dict_st is None: @@ -329,7 +329,7 @@ def _element_formatting(template, values_template_dict, file_template, keep_exte def is_local_file(f): - from ..utils.typing import TypeParser + from pydra.utils.typing import TypeParser return "container_path" not in f.metadata and TypeParser.contains_type( FileSet, f.type diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 691c6148c0..47a2f38686 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -15,9 +15,9 @@ ) import pydra from .helpers_file import template_update_single -from ..utils.hash import hash_function, Cache +from pydra.utils.hash import hash_function, Cache -# from ..utils.misc import add_exc_note +# from pydra.utils.misc import add_exc_note T = ty.TypeVar("T") @@ -356,7 +356,7 @@ def check_metadata(self): Also sets the default values when available and needed. """ - from ..utils.typing import TypeParser + from pydra.utils.typing import TypeParser supported_keys = { "allowed_values", @@ -431,7 +431,7 @@ class ShellOutSpec: """The process' standard input.""" def collect_additional_outputs(self, inputs, output_dir, outputs): - from ..utils.typing import TypeParser + from pydra.utils.typing import TypeParser """Collect additional outputs from shelltask output_spec.""" additional_out = {} @@ -675,7 +675,7 @@ def _check_requires(self, fld, inputs): @attr.s class LazyInterface: - _task: "core.TaskBase" = attr.ib() + _task: "core.Task" = attr.ib() _attr_type: str def __getattr__(self, name): @@ -853,7 +853,9 @@ def split(self, splitter: Splitter) -> "LazyField": splitter : str or ty.Tuple[str, ...] or ty.List[str] the splitter to append to the list of splitters """ - from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel + from pydra.utils.typing import ( + TypeParser, + ) # pylint: disable=import-outside-toplevel splits = self.splits | set([LazyField.sanitize_splitter(splitter)]) # Check to see whether the field has already been split over the given splitter @@ -935,7 +937,9 @@ def get_value( value : Any the resolved value of the lazy-field """ - from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel + from pydra.utils.typing import ( + TypeParser, + ) # pylint: disable=import-outside-toplevel value = getattr(wf.inputs, self.field) if TypeParser.is_subclass(self.type, StateArray) and not wf._pre_split: @@ -971,7 +975,9 @@ def get_value( value : Any the resolved value of the lazy-field """ - from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel + from pydra.utils.typing import ( + TypeParser, + ) # pylint: disable=import-outside-toplevel node = getattr(wf, self.name) result = node.result(state_index=state_index) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index fe3e598c21..f92b996bd7 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -7,7 +7,7 @@ from .workers import Worker, WORKERS from .core import is_workflow from .helpers import get_open_loop, load_and_run_async -from ..utils.hash import PersistentCache +from pydra.utils.hash import PersistentCache import logging diff --git a/pydra/engine/task.py b/pydra/engine/task.py index b4970aae2c..b9317602bd 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -44,35 +44,36 @@ import platform import re import attr +import attrs +import warnings import inspect import typing as ty import shlex from pathlib import Path -import warnings import cloudpickle as cp from fileformats.core import FileSet, DataType -from .core import TaskBase, is_lazy -from ..utils.messenger import AuditFlag +from .core import Task, is_lazy +from pydra.utils.messenger import AuditFlag from .specs import ( BaseSpec, SpecInfo, - ShellSpec, - ShellOutSpec, + # ShellSpec, + # ShellOutSpec, attr_fields, ) from .helpers import ( - ensure_list, + parse_format_string, position_sort, - argstr_formatting, - output_from_inputfields, + ensure_list, + # output_from_inputfields, parse_copyfile, ) from .helpers_file import template_update -from ..utils.typing import TypeParser +from pydra.utils.typing import TypeParser from .environments import Native -class FunctionTask(TaskBase): +class FunctionTask(Task): """Wrap a Python callable as a task element.""" def __init__( @@ -200,7 +201,7 @@ def _run_task(self, environment=None): del inputs["_func"] self.output_ = None output = cp.loads(self.inputs._func)(**inputs) - output_names = [el[0] for el in self.output_spec.fields] + output_names = [f.name for f in attr.fields(self.interface.Outputs)] if output is None: self.output_ = {nm: None for nm in output_names} elif len(output_names) == 1: @@ -217,7 +218,7 @@ def _run_task(self, environment=None): ) -class ShellCommandTask(TaskBase): +class ShellCommandTask(Task): """Wrap a shell command as a task element.""" input_spec = None @@ -269,18 +270,18 @@ def __init__( if name is None: name = "ShellTask_noname" - # using provided spec, class attribute or setting the default SpecInfo - self.input_spec = ( - input_spec - or self.input_spec - or SpecInfo(name="Inputs", fields=[], bases=(ShellSpec,)) - ) - self.output_spec = ( - output_spec - or self.output_spec - or SpecInfo(name="Output", fields=[], bases=(ShellOutSpec,)) - ) - self.output_spec = output_from_inputfields(self.output_spec, self.input_spec) + # # using provided spec, class attribute or setting the default SpecInfo + # self.input_spec = ( + # input_spec + # or self.input_spec + # or SpecInfo(name="Inputs", fields=[], bases=(ShellSpec,)) + # ) + # self.output_spec = ( + # output_spec + # or self.output_spec + # or SpecInfo(name="Output", fields=[], bases=(ShellOutSpec,)) + # ) + # self.output_spec = output_from_inputfields(self.output_spec, self.input_spec) for special_inp in ["executable", "args"]: if hasattr(self, special_inp): @@ -590,3 +591,40 @@ def split_cmd(cmd: str): else: cmd_args.append(arg) return cmd_args + + +def argstr_formatting(argstr, inputs, value_updates=None): + """formatting argstr that have form {field_name}, + using values from inputs and updating with value_update if provided + """ + inputs_dict = attr.asdict(inputs, recurse=False) + # if there is a value that has to be updated (e.g. single value from a list) + if value_updates: + inputs_dict.update(value_updates) + # getting all fields that should be formatted, i.e. {field_name}, ... + inp_fields = parse_format_string(argstr) + val_dict = {} + for fld_name in inp_fields: + fld_value = inputs_dict[fld_name] + fld_attr = getattr(attrs.fields(type(inputs)), fld_name) + if fld_value is attr.NOTHING or ( + fld_value is False + and fld_attr.type is not bool + and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) + ): + # if value is NOTHING, nothing should be added to the command + val_dict[fld_name] = "" + else: + val_dict[fld_name] = fld_value + + # formatting string based on the val_dict + argstr_formatted = argstr.format(**val_dict) + # removing extra commas and spaces after removing the field that have NOTHING + argstr_formatted = ( + argstr_formatted.replace("[ ", "[") + .replace(" ]", "]") + .replace("[,", "[") + .replace(",]", "]") + .strip() + ) + return argstr_formatted diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 48fd6e3120..0eb5c4156b 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -17,10 +17,9 @@ load_and_run, position_sort, parse_copyfile, - argstr_formatting, parse_format_string, ) -from ...utils.hash import hash_function +from pydra.utils.hash import hash_function from ..core import Workflow @@ -316,24 +315,6 @@ def mock_field(copyfile): parse_copyfile(mock_field((Mode.copy, 2))) -def test_argstr_formatting(): - @attrs.define - class Inputs: - a1_field: str - b2_field: float - c3_field: ty.Dict[str, str] - d4_field: ty.List[str] - - inputs = Inputs("1", 2.0, {"c": "3"}, ["4"]) - assert ( - argstr_formatting( - "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", - inputs, - ) - == "1 2.000000 -test 3 -me 4" - ) - - def test_parse_format_string1(): assert parse_format_string("{a}") == {"a"} diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index bceaf97402..728750594b 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -24,7 +24,7 @@ op_4var, ) -from ..core import TaskBase +from ..core import Task from ..specs import StateArray from ..submitter import Submitter @@ -46,7 +46,7 @@ def move2orig(): def test_task_init_1(): """task with mandatory arguments only""" nn = fun_addtwo() - assert isinstance(nn, TaskBase) + assert isinstance(nn, Task) assert nn.name == "fun_addtwo" assert hasattr(nn, "__call__") diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index defdad7a2b..e866987126 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -8,9 +8,9 @@ from ..submitter import Submitter from ..core import Workflow -from ...mark import task, annotate +from pydra.mark import task, annotate from .utils import identity -from ...utils.hash import hash_function, Cache +from pydra.utils.hash import hash_function if importlib.util.find_spec("numpy") is None: pytest.skip("can't find numpy library", allow_module_level=True) diff --git a/pydra/engine/tests/test_profiles.py b/pydra/engine/tests/test_profiles.py index f84f8d19f4..d3070ddeab 100644 --- a/pydra/engine/tests/test_profiles.py +++ b/pydra/engine/tests/test_profiles.py @@ -1,6 +1,6 @@ from ..core import Workflow from ..helpers import load_task -from ... import mark +from pydra import mark import numpy as np from pympler import asizeof diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 8221751d01..504d9fe71c 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -2,7 +2,8 @@ import typing as ty import os import attrs -from copy import deepcopy + +# from copy import deepcopy import time from ..specs import ( @@ -15,15 +16,20 @@ # ContainerSpec, LazyIn, LazyOut, - LazyField, + # LazyField, StateArray, ) -from ..helpers import make_klass + +# from ..helpers import make_klass from .utils import foo -from pydra import mark, Workflow +from pydra import mark +from pydra.engine import Workflow import pytest +make_klass = lambda x: x + + def test_basespec(): spec = BaseSpec() assert spec.hash == "0b1d98df22ecd1733562711c205abca2" diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 298e7e74b4..9d1cd9f3bd 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -17,10 +17,10 @@ gen_basic_wf_with_threadcount, gen_basic_wf_with_threadcount_concurrent, ) -from ..core import Workflow, TaskBase +from ..core import Workflow, Task from ..submitter import Submitter from ..workers import SerialWorker -from ... import mark +from pydra import mark from pathlib import Path from datetime import datetime @@ -685,7 +685,7 @@ def __init__(self, add_var, **kwargs): self.add_var = add_var async def exec_serial(self, runnable, rerun=False, environment=None): - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): with patch.dict(os.environ, {"BYO_ADD_VAR": str(self.add_var)}): result = runnable._run(rerun, environment=environment) return result diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 0d666574e3..d434004e3b 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1,15 +1,16 @@ import typing as ty -import os, sys +import os +import sys import attr import pytest import cloudpickle as cp from pathlib import Path import json import glob as glob -from ... import mark +from pydra import mark +from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages from ..core import Workflow -from ..task import AuditFlag, ShellCommandTask -from ...utils.messenger import FileMessenger, PrintMessenger, collect_messages +from ..task import AuditFlag, ShellCommandTask, argstr_formatting from .utils import gen_basic_wf from ..specs import ( MultiInputObj, @@ -20,7 +21,7 @@ ShellSpec, File, ) -from ...utils.hash import hash_function +from pydra.utils.hash import hash_function no_win = pytest.mark.skipif( @@ -1582,3 +1583,21 @@ def testfunc(a: A): result = testfunc(a=A(x=7))() assert result.output.out == 7 + + +def test_argstr_formatting(): + @attr.define + class Inputs: + a1_field: str + b2_field: float + c3_field: ty.Dict[str, str] + d4_field: ty.List[str] + + inputs = Inputs("1", 2.0, {"c": "3"}, ["4"]) + assert ( + argstr_formatting( + "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", + inputs, + ) + == "1 2.000000 -test 3 -me 4" + ) diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index a1849e221b..8d22a415e1 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -6,7 +6,7 @@ from ..submitter import Submitter from ..core import Workflow -from ... import mark +from pydra import mark from ..specs import File, Directory diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index c6aab6544f..3da1398c40 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -35,7 +35,7 @@ ) from ..submitter import Submitter from ..core import Workflow -from ... import mark +from pydra import mark from ..specs import SpecInfo, BaseSpec, ShellSpec from pydra.utils import exc_info_matches diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 5b0858866c..55a4ccb164 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -11,7 +11,7 @@ from ..core import Workflow from ..submitter import Submitter -from ... import mark +from pydra import mark need_docker = pytest.mark.skipif( diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index eaa40beb0a..30cf65ea86 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -10,7 +10,7 @@ import concurrent.futures as cf -from .core import TaskBase +from .core import Task from .helpers import ( get_available_cpus, read_and_display_async, @@ -142,7 +142,7 @@ def close(self): """Return whether the task is finished.""" async def exec_serial(self, runnable, rerun=False, environment=None): - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): return runnable._run(rerun, environment=environment) else: # it could be tuple that includes pickle files with tasks and inputs ind, task_main_pkl, _ = runnable @@ -177,7 +177,7 @@ def run_el(self, runnable, rerun=False, environment=None, **kwargs): async def exec_as_coro(self, runnable, rerun=False, environment=None): """Run a task (coroutine wrapper).""" - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): res = await self.loop.run_in_executor( self.pool, runnable._run, rerun, environment ) @@ -228,7 +228,7 @@ def run_el(self, runnable, rerun=False, environment=None): script_dir, batch_script = self._prepare_runscripts(runnable, rerun=rerun) if (script_dir / script_dir.parts[1]) == gettempdir(): logger.warning("Temporary directories may not be shared across computers") - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): cache_dir = runnable.cache_dir name = runnable.name uid = runnable.uid @@ -240,7 +240,7 @@ def run_el(self, runnable, rerun=False, environment=None): return self._submit_job(batch_script, name=name, uid=uid, cache_dir=cache_dir) def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): - if isinstance(task, TaskBase): + if isinstance(task, Task): cache_dir = task.cache_dir ind = None uid = task.uid @@ -465,7 +465,7 @@ def run_el(self, runnable, rerun=False): # TODO: add env ) = self._prepare_runscripts(runnable, rerun=rerun) if (script_dir / script_dir.parts[1]) == gettempdir(): logger.warning("Temporary directories may not be shared across computers") - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): cache_dir = runnable.cache_dir name = runnable.name uid = runnable.uid @@ -486,7 +486,7 @@ def run_el(self, runnable, rerun=False): # TODO: add env ) def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): - if isinstance(task, TaskBase): + if isinstance(task, Task): cache_dir = task.cache_dir ind = None uid = task.uid @@ -890,7 +890,7 @@ async def exec_dask(self, runnable, rerun=False): from dask.distributed import Client async with Client(**self.client_args, asynchronous=True) as client: - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): future = client.submit(runnable._run, rerun) result = await future else: # it could be tuple that includes pickle files with tasks and inputs @@ -989,7 +989,7 @@ async def exec_psij(self, runnable, rerun=False): jex = self.psij.JobExecutor.get_instance(self.subtype) absolute_path = Path(__file__).parent - if isinstance(runnable, TaskBase): + if isinstance(runnable, Task): cache_dir = runnable.cache_dir file_path = cache_dir / "runnable_function.pkl" with open(file_path, "wb") as file: diff --git a/pydra/mark/functions.py b/pydra/mark/functions.py index e191a61809..d3bdaa9b03 100644 --- a/pydra/mark/functions.py +++ b/pydra/mark/functions.py @@ -40,7 +40,7 @@ def task(func): ... return a ** 2.0 """ - from ..engine.task import FunctionTask + from pydra.engine.task import FunctionTask @wraps(func) def decorate(**kwargs): diff --git a/pydra/mark/tests/test_functions.py b/pydra/mark/tests/test_functions.py index 4be0343f1f..2383ce0057 100644 --- a/pydra/mark/tests/test_functions.py +++ b/pydra/mark/tests/test_functions.py @@ -3,7 +3,7 @@ import typing as ty from ..functions import task, annotate -from ...engine.task import FunctionTask +from pydra.engine.task import FunctionTask def test_task_equivalence(): diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index f83eedbd8c..37002f727e 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -6,9 +6,9 @@ import tempfile import pytest from pydra import mark -from ...engine.specs import File, LazyOutField, MultiInputObj +from pydra.engine.specs import File, LazyOutField, MultiInputObj from ..typing import TypeParser -from pydra import Workflow +from pydra.engine import Workflow from fileformats.application import Json, Yaml, Xml from .utils import ( generic_func_task, diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index e40f928047..01b1d2f5f8 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -8,13 +8,13 @@ import typing as ty import logging import attr -from ..engine.specs import ( +from pydra.engine.specs import ( LazyField, StateArray, MultiInputObj, MultiOutputObj, ) -from ..utils import add_exc_note +from pydra.utils import add_exc_note from fileformats import field try: From 1a0ffff2ef032cd0b72e1a7bbda6a7659ad9d0b6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 10 Nov 2024 00:26:57 +1100 Subject: [PATCH 018/342] got all new python interface unittests to pass --- pydra/design/base.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index d6b0f3f572..51fbf6c160 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -403,7 +403,7 @@ def extract_inputs_and_outputs_from_function( ] if all(re.match(r"^\w+$", o) for o in implicit_outputs): outputs = implicit_outputs - if isinstance(outputs, list) and len(outputs) > 1: + if len(outputs) > 1: if return_type is not ty.Any: if ty.get_origin(return_type) is not tuple: raise ValueError( @@ -416,18 +416,24 @@ def extract_inputs_and_outputs_from_function( f"Length of the outputs ({outputs}) does not match that " f"of the return types ({return_types})" ) - outputs = dict(zip(outputs, return_types)) - elif not isinstance(outputs, dict): - if outputs: - if not isinstance(outputs, list): - raise ValueError( - f"Unrecognised format for outputs ({outputs}), should be a list " - "or dict" - ) - output_name = outputs[0] + output_types = dict(zip(outputs, return_types)) + if isinstance(outputs, dict): + for output_name, output in outputs.items(): + if isinstance(output, Out) and output.type is ty.Any: + output.type = output_types[output_name] else: - output_name = "out" - outputs = {output_name: return_type} + outputs = output_types + + elif outputs: + output_name, output = next(iter(outputs.items())) + if isinstance(output, Out): + if output.type is ty.Any: + output.type = return_type + elif output is ty.Any: + output = return_type + outputs = {output_name: output} + else: + outputs = {"out": return_type} return inputs, outputs From 3a159bb661d2419cd132f8d99b4c395859fff416 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 11 Nov 2024 18:44:34 +1100 Subject: [PATCH 019/342] fixed up checking of wrapped type in python.task decorator --- pydra/design/python.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/pydra/design/python.py b/pydra/design/python.py index 96561ac946..bda4b89363 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -29,17 +29,28 @@ class out(Out): def interface( - wrapped: ty.Callable | None = None, + wrapped: type | ty.Callable | None = None, /, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, outputs: list[str | Out] | dict[str, Out | type] | type | None = None, auto_attribs: bool = True, ) -> Interface: + """ + Create an interface for a function or a class. - def make( - wrapped: ty.Callable | type | None = None, - ) -> Interface: + Parameters + ---------- + wrapped : type | callable | None + The function or class to create an interface for. + inputs : list[str | Arg] | dict[str, Arg | type] | None + The inputs to the function or class. + outputs : list[str | Out] | dict[str, Out | type] | type | None + The outputs of the function or class. + auto_attribs : bool + Whether to use auto_attribs mode when creating the class. + """ + def make(wrapped: ty.Callable | type) -> Interface: if inspect.isclass(wrapped): klass = wrapped function = klass.function @@ -49,6 +60,10 @@ def make( klass, arg, out, auto_attribs ) else: + if not inspect.isfunction(wrapped): + raise ValueError( + f"wrapped must be a class or a function, not {wrapped!r}" + ) klass = None function = wrapped input_helps, output_helps = parse_doc_string(function.__doc__) @@ -73,7 +88,7 @@ def make( return interface if wrapped is not None: - if not isinstance(wrapped, ty.Callable): - raise ValueError(f"wrapped must be a callable, not {wrapped!r}") + if not isinstance(wrapped, (ty.Callable, type)): + raise ValueError(f"wrapped must be a class or a callable, not {wrapped!r}") return make(wrapped) return make From dabe2ef25c9452e28d3365e5a047c9100663005d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 11 Nov 2024 18:45:24 +1100 Subject: [PATCH 020/342] added shell.task decorator implementation and working through tests --- pydra/design/__init__.py | 4 +- pydra/design/base.py | 124 +++- pydra/design/shell.py | 976 ++++++++++++++++++------------ pydra/design/tests/test_python.py | 59 +- pydra/design/tests/test_shell.py | 375 ++++++------ 5 files changed, 947 insertions(+), 591 deletions(-) diff --git a/pydra/design/__init__.py b/pydra/design/__init__.py index 9135ecd93c..69f789ca66 100644 --- a/pydra/design/__init__.py +++ b/pydra/design/__init__.py @@ -1,6 +1,6 @@ -from .base import Interface, fields +from .base import Interface, list_fields from . import python from . import shell -__all__ = ["Interface", "fields", "python", "shell"] +__all__ = ["Interface", "list_fields", "python", "shell"] diff --git a/pydra/design/base.py b/pydra/design/base.py index 51fbf6c160..c9d96d4140 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -5,6 +5,7 @@ import enum from copy import copy import attrs.validators +from attrs.converters import default_if_none from fileformats.generic import File from pydra.utils.typing import TypeParser from pydra.engine.helpers import from_list_if_single, ensure_list @@ -24,7 +25,7 @@ "Interface", "collate_fields", "make_interface", - "fields", + "list_fields", ] @@ -49,18 +50,77 @@ def is_type(_, __, val: ty.Any) -> bool: @attrs.define(kw_only=True) class Field: + """Base class for input and output fields to Pydra tasks + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + help_string: str, optional + A short description of the input field. + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + requires: list, optional + List of field names that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + + name: str | None = None + type: ty.Type[ty.Any] = attrs.field( + validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) + ) help_string: str = "" mandatory: bool = False - name: str | None = None - type: ty.Type[ty.Any] | None = attrs.field(validator=is_type, default=ty.Any) + requires: list | None = None + converter: ty.Callable | None = None + validator: ty.Callable | None = None @attrs.define(kw_only=True) class Arg(Field): + """Base class for input fields to Pydra tasks + + Parameters + ---------- + help_string: str + A short description of the input field. + default : Any, optional + the default value for the argument + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + List of field names that are required together with the field. + xor: list, optional + List of field names that are mutually exclusive with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + """ + default: ty.Any = EMPTY - allowed_values: list = None - requires: list = None - xor: list = None + allowed_values: list | None = None + xor: list | None = None copy_mode: File.CopyMode = File.CopyMode.any copy_collation: File.CopyCollation = File.CopyCollation.any copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single @@ -69,8 +129,7 @@ class Arg(Field): @attrs.define(kw_only=True) class Out(Field): - requires: list = None - callable: ty.Callable = None + pass OutputType = ty.TypeVar("OutputType") @@ -118,7 +177,7 @@ def collate_fields( outputs: list[str | Out] | dict[str, Out | type] | type | None = None, input_helps: dict[str, str] | None = None, output_helps: dict[str, str] | None = None, -) -> tuple[dict[str, Arg], dict[str, Out]]: +) -> tuple[list[Arg], list[Out]]: if inputs is None: inputs = [] @@ -210,7 +269,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: atr = getattr(klass, atr_name) except Exception: continue - if isinstance(atr, field_type): + if isinstance(atr, Field): atr.name = atr_name fields_dict[atr_name] = atr for atr_name, type_ in klass.__annotations__.items(): @@ -224,7 +283,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: fields_dict[atr_name].help_string = help except KeyError: pass - return fields_dict.values() + return list(fields_dict.values()) inputs = get_fields(klass, arg_type, auto_attribs, input_helps) @@ -261,13 +320,15 @@ def make_interface( klass: type | None = None, name: str | None = None, ): + assert isinstance(inputs, list) + assert isinstance(outputs, list) if name is None and klass is not None: name = klass.__name__ outputs_klass = get_outputs_class(klass) if outputs_klass is None: outputs_klass = type("Outputs", (), {}) else: - # Ensure that the class has it's own annotaitons dict so we can modify it without + # Ensure that the class has it's own annotations dict so we can modify it without # messing up other classes outputs_klass.__annotations__ = copy(outputs_klass.__annotations__) # Now that we have saved the attributes in lists to be @@ -282,7 +343,7 @@ def make_interface( ), ) outputs_klass.__annotations__[out.name] = out.type - outputs_klass = attrs.define(auto_attribs=False)(outputs_klass) + outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) if klass is None or not issubclass(klass, Interface): if name is None: @@ -299,6 +360,8 @@ def make_interface( # Ensure that the class has it's own annotaitons dict so we can modify it without # messing up other classes klass.__annotations__ = copy(klass.__annotations__) + klass.Task = task_type + klass.Outputs = outputs_klass # Now that we have saved the attributes in lists to be for arg in inputs: setattr( @@ -316,7 +379,7 @@ def make_interface( # Create class using attrs package, will create attributes for all columns and # parameters - attrs_klass = attrs.define(auto_attribs=False)(klass) + attrs_klass = attrs.define(auto_attribs=False, kw_only=True)(klass) return attrs_klass @@ -326,27 +389,34 @@ def get_converter(field: Field, interface_name: str): type_checker = TypeParser[field.type]( field.type, label=checker_label, superclass_auto_cast=True ) + converters = [] if field.type in (MultiInputObj, MultiInputFile): - converter = attrs.converters.pipe(ensure_list, type_checker) + converters.append(ensure_list) elif field.type in (MultiOutputObj, MultiOutputFile): - converter = attrs.converters.pipe(from_list_if_single, type_checker) + converters.append(from_list_if_single) + if field.converter: + converters.append(field.converter) + if converters: + converters.append(type_checker) + converter = attrs.converters.pipe(*converters) else: converter = type_checker return converter def get_validator(field: Field, interface_name: str): + validators = [] if field.allowed_values: - if field._validator is None: - field._validator = allowed_values_validator - elif isinstance(field._validator, ty.Iterable): - if allowed_values_validator not in field._validator: - field._validator.append(allowed_values_validator) - elif field._validator is not allowed_values_validator: - field._validator = [ - field._validator, - allowed_values_validator, - ] + validators.append(allowed_values_validator) + if isinstance(field.validator, ty.Iterable): + validators.extend(field.validator) + elif field.validator: + validators.append(field.validator) + if len(validators) > 1: + return validators + elif validators: + return validators[0] + return None def allowed_values_validator(_, attribute, value): @@ -528,7 +598,7 @@ def split_block(string: str) -> ty.Generator[str, None, None]: yield block.strip() -def fields(interface: Interface) -> list[Field]: +def list_fields(interface: Interface) -> list[Field]: return [ f.metadata[PYDRA_ATTR_METADATA] for f in attrs.fields(interface) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 47fd162116..0657da46ed 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -2,422 +2,652 @@ from __future__ import annotations import typing as ty +import re +from collections import defaultdict +import inspect +from copy import copy import attrs -from .python import arg as python_arg - -# import os -import pydra.engine.specs +from fileformats.core import from_mime +from fileformats import generic, field +from fileformats.core.exceptions import FormatRecognitionError +from .base import ( + Arg, + Out, + check_explicit_fields_are_none, + get_fields_from_class, + collate_fields, + Interface, + make_interface, +) +from pydra.engine.task import ShellCommandTask @attrs.define(kw_only=True) -class arg(python_arg): - argstr: str = None - position: int = None - sep: str = None - allowed_values: list = None - container_path: bool = False - output_file_template: str = None - output_field_name: str = None - keep_extension: bool = True - readonly: bool = False - formatter: ty.Callable = None - - -# def arg( -# help_string: str, -# default: ty.Any = attrs.NOTHING, -# argstr: str = None, -# position: int = None, -# mandatory: bool = False, -# sep: str = None, -# allowed_values: list = None, -# requires: list = None, -# xor: list = None, -# copyfile: bool = None, -# container_path: bool = False, -# output_file_template: str = None, -# output_field_name: str = None, -# keep_extension: bool = True, -# readonly: bool = False, -# formatter: ty.Callable = None, -# **kwargs, -# ): -# """ -# Returns an attrs field with appropriate metadata for it to be added as an argument in -# a Pydra shell command task definition +class arg(Arg): + """An input field that specifies a command line argument -# Parameters -# ------------ -# help_string: str -# A short description of the input field. -# default : Any, optional -# the default value for the argument -# argstr: str, optional -# A flag or string that is used in the command before the value, e.g. -v or -# -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, -# the flag is used before every element if a list is provided as a value. If no -# argstr is used the field is not part of the command. -# position: int, optional -# Position of the field in the command, could be nonnegative or negative integer. -# If nothing is provided the field will be inserted between all fields with -# nonnegative positions and fields with negative positions. -# mandatory: bool, optional -# If True user has to provide a value for the field, by default it is False -# sep: str, optional -# A separator if a list is provided as a value. -# allowed_values: list, optional -# List of allowed values for the field. -# requires: list, optional -# List of field names that are required together with the field. -# xor: list, optional -# List of field names that are mutually exclusive with the field. -# copyfile: bool, optional -# If True, a hard link is created for the input file in the output directory. If -# hard link not possible, the file is copied to the output directory, by default -# it is False -# container_path: bool, optional -# If True a path will be consider as a path inside the container (and not as a -# local path, by default it is False -# output_file_template: str, optional -# If provided, the field is treated also as an output field and it is added to -# the output spec. The template can use other fields, e.g. {file1}. Used in order -# to create an output specification. -# output_field_name: str, optional -# If provided the field is added to the output spec with changed name. Used in -# order to create an output specification. Used together with output_file_template -# keep_extension: bool, optional -# A flag that specifies if the file extension should be removed from the field value. -# Used in order to create an output specification, by default it is True -# readonly: bool, optional -# If True the input field can’t be provided by the user but it aggregates other -# input fields (for example the fields with argstr: -o {fldA} {fldB}), by default -# it is False -# formatter: function, optional -# If provided the argstr of the field is created using the function. This function -# can for example be used to combine several inputs into one command argument. The -# function can take field (this input field will be passed to the function), -# inputs (entire inputs will be passed) or any input field name (a specific input -# field will be sent). -# **kwargs -# remaining keyword arguments are passed onto the underlying attrs.field function -# """ - -# metadata = { -# "help_string": help_string, -# "argstr": argstr, -# "position": position, -# "mandatory": mandatory, -# "sep": sep, -# "allowed_values": allowed_values, -# "requires": requires, -# "xor": xor, -# "copyfile": copyfile, -# "container_path": container_path, -# "output_file_template": output_file_template, -# "output_field_name": output_field_name, -# "keep_extension": keep_extension, -# "readonly": readonly, -# "formatter": formatter, -# } + Parameters + ---------- + help_string: str + A short description of the input field. + default : Any, optional + the default value for the argument + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + List of field names that are required together with the field. + xor: list, optional + List of field names that are mutually exclusive with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + argstr: str, optional + A flag or string that is used in the command before the value, e.g. -v or + -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, + the flag is used before every element if a list is provided as a value. If no + argstr is used the field is not part of the command. + position: int, optional + Position of the field in the command, could be nonnegative or negative integer. + If nothing is provided the field will be inserted between all fields with + nonnegative positions and fields with negative positions. + sep: str, optional + A separator if a list is provided as a value. + container_path: bool, optional + If True a path will be consider as a path inside the container (and not as a + local path, by default it is False + formatter: function, optional + If provided the argstr of the field is created using the function. This function + can for example be used to combine several inputs into one command argument. The + function can take field (this input field will be passed to the function), + inputs (entire inputs will be passed) or any input field name (a specific input + field will be sent). + """ -# return attrs.field( -# default=default, -# metadata={k: v for k, v in metadata.items() if v is not None}, -# **kwargs, -# ) + argstr: str | None = None + position: int | None = None + sep: str | None = None + allowed_values: list | None = None + container_path: bool = False + formatter: ty.Callable | None = None -def out( - help_string: str, - mandatory: bool = False, - output_file_template: str = None, - output_field_name: str = None, - keep_extension: bool = True, - requires: list = None, - callable: ty.Callable = None, - **kwargs, -): - """Returns an attrs field with appropriate metadata for it to be added as an output of - a Pydra shell command task definition +@attrs.define(kw_only=True) +class out(Out): + """An output field that specifies a command line argument Parameters ---------- - help_string: str - A short description of the input field. The same as in input_spec. - mandatory: bool, default: False - If True the output file has to exist, otherwise an error will be raised. - output_file_template: str, optional - If provided the output file name (or list of file names) is created using the - template. The template can use other fields, e.g. {file1}. The same as in - input_spec. - output_field_name: str, optional - If provided the field is added to the output spec with changed name. The same as - in input_spec. Used together with output_file_template - keep_extension: bool, default: True - A flag that specifies if the file extension should be removed from the field - value. The same as in input_spec. - requires: list - List of field names that are required to create a specific output. The fields - do not have to be a part of the output_file_template and if any field from the - list is not provided in the input, a NOTHING is returned for the specific output. - This has a different meaning than the requires form the input_spec. - callable: Callable + callable : Callable, optional If provided the output file name (or list of file names) is created using the function. The function can take field (the specific output field will be passed to the function), output_dir (task output_dir will be used), stdout, stderr (stdout and stderr of the task will be sent) inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). - **kwargs - remaining keyword arguments are passed onto the underlying attrs.field function """ - metadata = { - "help_string": help_string, - "mandatory": mandatory, - "output_file_template": output_file_template, - "output_field_name": output_field_name, - "keep_extension": keep_extension, - "requires": requires, - "callable": callable, - } - return attrs.field( - metadata={k: v for k, v in metadata.items() if v is not None}, **kwargs - ) + callable: ty.Callable | None = None -def interface( - klass_or_name: ty.Union[type, str], - executable: ty.Optional[str] = None, - input_fields: ty.Optional[dict[str, dict]] = None, - output_fields: ty.Optional[dict[str, dict]] = None, - bases: ty.Optional[list[type]] = None, - inputs_bases: ty.Optional[list[type]] = None, - outputs_bases: ty.Optional[list[type]] = None, -) -> type: - """ - Construct an analysis class and validate all the components fit together +@attrs.define(kw_only=True) +class outarg(Out, arg): + """An input field that specifies where to save the output file Parameters ---------- - klass_or_name : type or str - Either the class decorated by the @shell_task decorator or the name for a - dynamically generated class - executable : str, optional - If dynamically constructing a class (instead of decorating an existing one) the - name of the executable to run is provided - input_fields : dict[str, dict], optional - If dynamically constructing a class (instead of decorating an existing one) the - input fields can be provided as a dictionary of dictionaries, where the keys - are the name of the fields and the dictionary contents are passed as keyword - args to cmd_arg, with the exception of "type", which is used as the type annotation - of the field. - output_fields : dict[str, dict], optional - If dynamically constructing a class (instead of decorating an existing one) the - output fields can be provided as a dictionary of dictionaries, where the keys - are the name of the fields and the dictionary contents are passed as keyword - args to cmd_out, with the exception of "type", which is used as the type annotation - of the field. - bases : list[type] - Base classes for dynamically constructed shell command classes - inputs_bases : list[type] - Base classes for the input spec of dynamically constructed shell command classes - outputs_bases : list[type] - Base classes for the input spec of dynamically constructed shell command classes + help_string: str + A short description of the input field. + default : Any, optional + the default value for the argument + mandatory: bool, optional + If True user has to provide a value for the field, by default it is False + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + List of field names that are required together with the field. + xor: list, optional + List of field names that are mutually exclusive with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + argstr: str, optional + A flag or string that is used in the command before the value, e.g. -v or + -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, + the flag is used before every element if a list is provided as a value. If no + argstr is used the field is not part of the command. + position: int, optional + Position of the field in the command, could be nonnegative or negative integer. + If nothing is provided the field will be inserted between all fields with + nonnegative positions and fields with negative positions. + sep: str, optional + A separator if a list is provided as a value. + container_path: bool, optional + If True a path will be consider as a path inside the container (and not as a + local path, by default it is False + formatter: function, optional + If provided the argstr of the field is created using the function. This function + can for example be used to combine several inputs into one command argument. The + function can take field (this input field will be passed to the function), + inputs (entire inputs will be passed) or any input field name (a specific input + field will be sent). + file_template: str, optional + If provided, the field is treated also as an output field and it is added to + the output spec. The template can use other fields, e.g. {file1}. Used in order + to create an output specification. + template_field: str, optional + If provided the field is added to the output spec with changed name. Used in + order to create an output specification. Used together with output_file_template - Returns - ------- - type - the shell command task class """ - annotations = { - "executable": str, - "Outputs": type, - } - dct = {"__annotations__": annotations} - - if isinstance(klass_or_name, str): - # Dynamically created classes using shell_task as a function - name = klass_or_name - - if executable is not None: - dct["executable"] = executable - if input_fields is None: - input_fields = {} - if output_fields is None: - output_fields = {} - bases = list(bases) if bases is not None else [] - inputs_bases = list(inputs_bases) if inputs_bases is not None else [] - outputs_bases = list(outputs_bases) if outputs_bases is not None else [] - - # Ensure base classes included somewhere in MRO - def ensure_base_included(base_class: type, bases_list: list[type]): - if not any(issubclass(b, base_class) for b in bases_list): - bases_list.append(base_class) - - # Get inputs and outputs bases from base class if not explicitly provided - for base in bases: - if not inputs_bases: - try: - inputs_bases = [base.Inputs] - except AttributeError: - pass - if not outputs_bases: - try: - outputs_bases = [base.Outputs] - except AttributeError: - pass - - # Ensure bases are lists and can be modified - ensure_base_included(pydra.engine.task.ShellCommandTask, bases) - ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) - ensure_base_included(pydra.engine.specs.ShellOutSpec, outputs_bases) - - def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): - annotations = {} - attrs_dict = {"__annotations__": annotations} - for name, dct in fields.items(): - kwargs = dict(dct) # copy to avoid modifying input to outer function - annotations[name] = kwargs.pop("type") - attrs_dict[name] = attrs_func(**kwargs) - return attrs_dict - - Inputs = attrs.define(kw_only=True, slots=False)( - type( - "Inputs", - tuple(inputs_bases), - convert_to_attrs(input_fields, arg), - ) - ) + file_template: str | None = None + template_field: str | None = None - Outputs = attrs.define(kw_only=True, slots=False)( - type( - "Outputs", - tuple(outputs_bases), - convert_to_attrs(output_fields, out), - ) - ) - else: - # Statically defined classes using shell_task as decorator - if ( - executable, - input_fields, - output_fields, - bases, - inputs_bases, - outputs_bases, - ) != (None, None, None, None, None, None): - raise RuntimeError( - "When used as a decorator on a class, `shell_task` should not be " - "provided any other arguments" - ) - klass = klass_or_name - name = klass.__name__ +def interface( + wrapped: type | str | None = None, + /, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + auto_attribs: bool = True, + args_last: bool = False, + name: str | None = None, +) -> Interface: + """Create a shell command interface - bases = [klass] - if not issubclass(klass, pydra.engine.task.ShellCommandTask): - bases.append(pydra.engine.task.ShellCommandTask) + Parameters + ---------- + wrapped : type | str | None + The class or command line template to create an interface for + inputs : list[str | Arg] | dict[str, Arg | type] | None + The input fields of the shell command + outputs : list[str | Out] | dict[str, Out | type] | type | None + The output fields of the shell command + auto_attribs : bool + Whether to use auto_attribs mode when creating the class + args_last : bool + Whether to put the executable argument last in the command line instead of first + as they appear in the template + name: str | None + The name of the returned class + """ - try: + def make( + wrapped: ty.Callable | type | None = None, + ) -> Interface: + + if inspect.isclass(wrapped): + klass = wrapped executable = klass.executable - except AttributeError: - raise RuntimeError( - "Classes decorated by `shell_task` should contain an `executable` " - "attribute specifying the shell tool to run" + class_name = klass.__name__ if not name else name + check_explicit_fields_are_none(klass, inputs, outputs) + parsed_inputs, parsed_outputs = get_fields_from_class( + klass, arg, out, auto_attribs ) - try: - Inputs = klass.Inputs - except AttributeError: - raise RuntimeError( - "Classes decorated by `shell_task` should contain an `Inputs` class " - "attribute specifying the inputs to the shell tool" + else: + if not isinstance(wrapped, str): + raise ValueError( + f"wrapped must be a class or a string, not {wrapped!r}" + ) + klass = None + input_helps, output_helps = {}, {} + + executable, inferred_inputs, inferred_outputs = parse_command_line_template( + wrapped, + args_last=args_last, + inputs=inputs, + outputs=outputs, ) - try: - Outputs = klass.Outputs - except AttributeError: - Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) - - # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. - # not in a base class) - if "Inputs" in klass.__dict__: - Inputs = attrs.define(kw_only=True, slots=False)(Inputs) - if "Outputs" in klass.__dict__: - Outputs = attrs.define(kw_only=True, slots=False)(Outputs) - - if not issubclass(Inputs, pydra.engine.specs.ShellSpec): - Inputs = attrs.define(kw_only=True, slots=False)( - type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) - ) - - template_fields = _gen_output_template_fields(Inputs, Outputs) - - if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): - outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) - add_base_class = True - else: - outputs_bases = (Outputs,) - add_base_class = False + parsed_inputs, parsed_outputs = collate_fields( + arg_type=arg, + out_type=out, + inputs=inferred_inputs, + outputs=inferred_outputs, + input_helps=input_helps, + output_helps=output_helps, + ) + class_name = executable if not name else name - if add_base_class or template_fields: - Outputs = attrs.define(kw_only=True, slots=False)( - type("Outputs", outputs_bases, template_fields) + parsed_inputs.append( + arg(name="executable", type=str, argstr="", position=0, default=executable) ) + # Copy the outputs into the inputs if they are outargs + parsed_inputs.extend(o for o in parsed_outputs if isinstance(o, arg)) + + # Check position values are unique + positions = defaultdict(list) + for inpt in parsed_inputs: + positions[inpt.position].append(inpt.name) + if multiple_positions := {k: v for k, v in positions.items() if len(v) > 1}: + raise ValueError( + f"Multiple fields have the same position: {multiple_positions}" + ) - dct["Inputs"] = Inputs - dct["Outputs"] = Outputs - - task_klass = type(name, tuple(bases), dct) - - if not hasattr(task_klass, "executable"): - raise RuntimeError( - "Classes generated by `shell_task` should contain an `executable` " - "attribute specifying the shell tool to run" + interface = make_interface( + ShellCommandTask, + parsed_inputs, + parsed_outputs, + name=class_name, + klass=klass, ) - - task_klass.input_spec = pydra.engine.specs.SpecInfo( - name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) - ) - task_klass.output_spec = pydra.engine.specs.SpecInfo( - name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) - ) - - return task_klass - - -def _gen_output_template_fields(Inputs: type, Outputs: type) -> dict: - """Auto-generates output fields for inputs that specify an 'output_file_template' + return interface + + if wrapped is not None: + if not isinstance(wrapped, (type, str)): + raise ValueError(f"wrapped must be a class or a string, not {wrapped!r}") + return make(wrapped) + return make + + +def parse_command_line_template( + template: str, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + args_last: bool = False, +) -> ty.Tuple[str, dict[str, Arg | type], dict[str, Out | type]]: + """Parses a command line template into a name and input and output fields. Fields + are inferred from the template if not provided, where inputs are specified with `` + and outputs with ``. The types of the fields can be specified using their + MIME like (see fileformats.core.from_mime), e.g. + + ``` + my_command + ``` + + The template can also specify options with `-` or `--` + followed by the option name and arguments with ``. The type is optional and + will default to `generic/fs-object` if not provided for arguments and `field/text` for + options. The file-formats namespace can be dropped for generic and field formats, e.g. + + ``` + another-command --output + ``` Parameters ---------- - Inputs : type - Inputs specification class - Outputs : type - Outputs specification class + template : str + The command line template + inputs : list[str | Arg] | dict[str, Arg | type] | None + The input fields of the shell command + outputs : list[str | Out] | dict[str, Out | type] | type | None + The output fields of the shell command + args_last : bool + Whether to put the executable argument last in the command line instead of first + as they appear in the template Returns ------- - template_fields: dict[str, attrs._make_CountingAttribute] - the template fields to add to the output spec + executable : str + The name of the command line template + inputs : dict + The input fields of the command line template + outputs : dict + The output fields of the command line template """ - annotations = {} - template_fields = {"__annotations__": annotations} - output_field_names = [f.name for f in attrs.fields(Outputs)] - for fld in attrs.fields(Inputs): - if "output_file_template" in fld.metadata: - if "output_field_name" in fld.metadata: - field_name = fld.metadata["output_field_name"] + if isinstance(inputs, list): + inputs = {arg.name: arg for arg in inputs} + elif isinstance(inputs, dict): + inputs = copy(inputs) # We don't want to modify the original + else: + inputs = {} + if isinstance(outputs, list): + outputs = {out.name: out for out in outputs} + elif isinstance(outputs, dict): + outputs = copy(outputs) # We don't want to modify the original + elif not outputs: + outputs = {} + parts = template.split(maxsplit=1) + if len(parts) == 1: + return template, inputs, outputs + executable, args_str = parts + tokens = re.split(r"\s+", args_str.strip()) + arg_re = re.compile(r"<([:a-zA-Z0-9\|\-\.\/\+]+)>") + opt_re = re.compile(r"--?(\w+)") + + arguments = [] + options = [] + option = None + position = 1 + + def merge_or_create_field(name, field_type, type): + """Merge the typing information with an existing field if it exists""" + if isinstance(field_type, out): + dct = outputs + else: + dct = inputs + try: + field = dct.pop(name) + except KeyError: + field = field_type(name=name, type=type_) + else: + if isinstance(field, dict): + field = field_type(**field) + elif not isinstance(field, field_type): # If field type is outarg not out + field = field_type(**attrs.asdict(field)) + field.name = name + if field.type is ty.Any: + field.type = type_ + + def add_option(opt): + name, field_type, type_ = opt + if len(type_) > 1: + type_ = tuple[tuple(type_)] + else: + type_ = type_[0] + options.append(merge_or_create_field(name, field_type, type_)) + + for token in tokens: + if match := arg_re.match(token): + name = match.group() + if name.startswith("out|"): + name = name[4:] + field_type = outarg else: - field_name = fld.name - # skip adding if the field already in the output_spec - exists_already = field_name in output_field_names - if not exists_already: - metadata = { - "help_string": fld.metadata["help_string"], - "mandatory": fld.metadata["mandatory"], - "keep_extension": fld.metadata["keep_extension"], - } - template_fields[field_name] = attrs.field(metadata=metadata) - annotations[field_name] = str - return template_fields + field_type = arg + if ":" in name: + name, type_str = name.split(":") + if "/" in type_str: + type_ = from_mime(type_str) + else: + try: + type_ = from_mime(f"field/{type_str}") + except FormatRecognitionError: + try: + type_ = from_mime(f"generic/{type_str}") + except FormatRecognitionError: + raise ValueError(f"Unknown type {type_str}") + else: + type_ = generic.FsObject if field_type is arg else field.Text + type_ = from_mime(type_str) if type_str is not None else ty.Any + if option is None: + arguments.append(merge_or_create_field(name, field_type, type_)) + position += 1 + else: + option[1].append((name, type_)) + elif match := opt_re.match(token): + if option is not None: + add_option(option) + position += 1 + option = (match.group(1), field_type, []) + if option is not None: + add_option(option) + + inferred_inputs = [] + inferred_outputs = [] + + all_args = options + arguments if args_last else arguments + options + + for i, argument in enumerate(all_args, start=1): + argument.position = i + if isinstance(argument, outarg): + inferred_outputs.append(argument) + else: + inferred_inputs.append(argument) + + return executable, inferred_inputs, inferred_outputs + + +# def interface( +# klass_or_name: ty.Union[type, str], +# executable: ty.Optional[str] = None, +# input_fields: ty.Optional[dict[str, dict]] = None, +# output_fields: ty.Optional[dict[str, dict]] = None, +# bases: ty.Optional[list[type]] = None, +# inputs_bases: ty.Optional[list[type]] = None, +# outputs_bases: ty.Optional[list[type]] = None, +# ) -> type: +# """ +# Construct an analysis class and validate all the components fit together + +# Parameters +# ---------- +# klass_or_name : type or str +# Either the class decorated by the @shell_task decorator or the name for a +# dynamically generated class +# executable : str, optional +# If dynamically constructing a class (instead of decorating an existing one) the +# name of the executable to run is provided +# input_fields : dict[str, dict], optional +# If dynamically constructing a class (instead of decorating an existing one) the +# input fields can be provided as a dictionary of dictionaries, where the keys +# are the name of the fields and the dictionary contents are passed as keyword +# args to cmd_arg, with the exception of "type", which is used as the type annotation +# of the field. +# output_fields : dict[str, dict], optional +# If dynamically constructing a class (instead of decorating an existing one) the +# output fields can be provided as a dictionary of dictionaries, where the keys +# are the name of the fields and the dictionary contents are passed as keyword +# args to cmd_out, with the exception of "type", which is used as the type annotation +# of the field. +# bases : list[type] +# Base classes for dynamically constructed shell command classes +# inputs_bases : list[type] +# Base classes for the input spec of dynamically constructed shell command classes +# outputs_bases : list[type] +# Base classes for the input spec of dynamically constructed shell command classes + +# Returns +# ------- +# type +# the shell command task class +# """ + +# annotations = { +# "executable": str, +# "Outputs": type, +# } +# dct = {"__annotations__": annotations} + +# if isinstance(klass_or_name, str): +# # Dynamically created classes using shell_task as a function +# name = klass_or_name + +# if executable is not None: +# dct["executable"] = executable +# if input_fields is None: +# input_fields = {} +# if output_fields is None: +# output_fields = {} +# bases = list(bases) if bases is not None else [] +# inputs_bases = list(inputs_bases) if inputs_bases is not None else [] +# outputs_bases = list(outputs_bases) if outputs_bases is not None else [] + +# # Ensure base classes included somewhere in MRO +# def ensure_base_included(base_class: type, bases_list: list[type]): +# if not any(issubclass(b, base_class) for b in bases_list): +# bases_list.append(base_class) + +# # Get inputs and outputs bases from base class if not explicitly provided +# for base in bases: +# if not inputs_bases: +# try: +# inputs_bases = [base.Inputs] +# except AttributeError: +# pass +# if not outputs_bases: +# try: +# outputs_bases = [base.Outputs] +# except AttributeError: +# pass + +# # Ensure bases are lists and can be modified +# ensure_base_included(pydra.engine.task.ShellCommandTask, bases) +# ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) +# ensure_base_included(pydra.engine.specs.ShellOutSpec, outputs_bases) + +# def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): +# annotations = {} +# attrs_dict = {"__annotations__": annotations} +# for name, dct in fields.items(): +# kwargs = dict(dct) # copy to avoid modifying input to outer function +# annotations[name] = kwargs.pop("type") +# attrs_dict[name] = attrs_func(**kwargs) +# return attrs_dict + +# Inputs = attrs.define(kw_only=True, slots=False)( +# type( +# "Inputs", +# tuple(inputs_bases), +# convert_to_attrs(input_fields, arg), +# ) +# ) + +# Outputs = attrs.define(kw_only=True, slots=False)( +# type( +# "Outputs", +# tuple(outputs_bases), +# convert_to_attrs(output_fields, out), +# ) +# ) + +# else: +# # Statically defined classes using shell_task as decorator +# if ( +# executable, +# input_fields, +# output_fields, +# bases, +# inputs_bases, +# outputs_bases, +# ) != (None, None, None, None, None, None): +# raise RuntimeError( +# "When used as a decorator on a class, `shell_task` should not be " +# "provided any other arguments" +# ) +# klass = klass_or_name +# name = klass.__name__ + +# bases = [klass] +# if not issubclass(klass, pydra.engine.task.ShellCommandTask): +# bases.append(pydra.engine.task.ShellCommandTask) + +# try: +# executable = klass.executable +# except AttributeError: +# raise RuntimeError( +# "Classes decorated by `shell_task` should contain an `executable` " +# "attribute specifying the shell tool to run" +# ) +# try: +# Inputs = klass.Inputs +# except AttributeError: +# raise RuntimeError( +# "Classes decorated by `shell_task` should contain an `Inputs` class " +# "attribute specifying the inputs to the shell tool" +# ) + +# try: +# Outputs = klass.Outputs +# except AttributeError: +# Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) + +# # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. +# # not in a base class) +# if "Inputs" in klass.__dict__: +# Inputs = attrs.define(kw_only=True, slots=False)(Inputs) +# if "Outputs" in klass.__dict__: +# Outputs = attrs.define(kw_only=True, slots=False)(Outputs) + +# if not issubclass(Inputs, pydra.engine.specs.ShellSpec): +# Inputs = attrs.define(kw_only=True, slots=False)( +# type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) +# ) + +# template_fields = _gen_output_template_fields(Inputs, Outputs) + +# if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): +# outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) +# add_base_class = True +# else: +# outputs_bases = (Outputs,) +# add_base_class = False + +# if add_base_class or template_fields: +# Outputs = attrs.define(kw_only=True, slots=False)( +# type("Outputs", outputs_bases, template_fields) +# ) + +# dct["Inputs"] = Inputs +# dct["Outputs"] = Outputs + +# task_klass = type(name, tuple(bases), dct) + +# if not hasattr(task_klass, "executable"): +# raise RuntimeError( +# "Classes generated by `shell_task` should contain an `executable` " +# "attribute specifying the shell tool to run" +# ) + +# task_klass.input_spec = pydra.engine.specs.SpecInfo( +# name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) +# ) +# task_klass.output_spec = pydra.engine.specs.SpecInfo( +# name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) +# ) + +# return task_klass + + +# def _gen_output_template_fields(Inputs: type, Outputs: type) -> dict: +# """Auto-generates output fields for inputs that specify an 'output_file_template' + +# Parameters +# ---------- +# Inputs : type +# Inputs specification class +# Outputs : type +# Outputs specification class + +# Returns +# ------- +# template_fields: dict[str, attrs._make_CountingAttribute] +# the template fields to add to the output spec +# """ +# annotations = {} +# template_fields = {"__annotations__": annotations} +# output_field_names = [f.name for f in attrs.fields(Outputs)] +# for fld in attrs.fields(Inputs): +# if "output_file_template" in fld.metadata: +# if "output_field_name" in fld.metadata: +# field_name = fld.metadata["output_field_name"] +# else: +# field_name = fld.name +# # skip adding if the field already in the output_spec +# exists_already = field_name in output_field_names +# if not exists_already: +# metadata = { +# "help_string": fld.metadata["help_string"], +# "mandatory": fld.metadata["mandatory"], +# "keep_extension": fld.metadata["keep_extension"], +# } +# template_fields[field_name] = attrs.field(metadata=metadata) +# annotations[field_name] = str +# return template_fields diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 1b011246f2..ef175516ce 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -1,6 +1,7 @@ from operator import attrgetter import pytest from pydra import design +from decimal import Decimal from pydra.design.python import arg, out, interface from pydra.engine.task import FunctionTask @@ -13,17 +14,19 @@ def sample_interface(a: int) -> float: SampleInterface = interface( sample_interface, inputs={"a": arg(help_string="The argument to be doubled")}, - outputs={"b": out(help_string="the doubled output")}, + outputs={"b": out(help_string="the doubled output", type=Decimal)}, ) assert issubclass(SampleInterface, design.Interface) - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int, help_string="The argument to be doubled"), ] assert outputs == [ - out(name="b", type=float, help_string="the doubled output"), + out(name="b", type=Decimal, help_string="the doubled output"), ] @@ -39,8 +42,10 @@ def sample_interface(a: int) -> int: ) assert issubclass(SampleInterface, design.Interface) - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [arg(name="a", type=float)] assert outputs == [out(name="b", type=float)] @@ -53,8 +58,10 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: assert issubclass(SampleInterface, design.Interface) assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int), arg(name="b", type=float), @@ -75,8 +82,10 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return c, d assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int), arg(name="b", type=float), @@ -101,8 +110,10 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int, help_string="First input to be inputted"), arg(name="b", type=float, help_string="Second input"), @@ -131,8 +142,10 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int, help_string="First input to be inputted"), arg(name="b", type=float, help_string="Second input"), @@ -169,8 +182,10 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int, help_string="First input to be inputted"), arg(name="b", type=float, help_string="Second input"), @@ -212,8 +227,10 @@ def function(a, b): assert issubclass(SampleInterface, design.Interface) assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int, help_string="First input to be inputted"), arg(name="b", type=float, help_string="Second input"), @@ -275,8 +292,10 @@ def function(a, b): return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.fields(SampleInterface), key=attrgetter("name")) - outputs = sorted(design.fields(SampleInterface.Outputs), key=attrgetter("name")) + inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) + outputs = sorted( + design.list_fields(SampleInterface.Outputs), key=attrgetter("name") + ) assert inputs == [ arg(name="a", type=int, help_string="First input to be inputted"), arg(name="b", type=float, help_string="Second input"), diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index cca87179be..c07e02bd54 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -1,10 +1,12 @@ import os import tempfile -import attrs +from operator import attrgetter from pathlib import Path +import attrs import pytest import cloudpickle as cp -from pydra.design import shell, Interface +from pydra.design import shell, Interface, list_fields +from fileformats.generic import File, Directory def list_entries(stdout): @@ -21,10 +23,10 @@ def Ls(request): if request.param == "static": @shell.interface - class Ls: + class Ls(Interface["Ls.Outputs"]): executable = "ls" - directory: os.PathLike = shell.arg( + directory: Directory = shell.arg( help_string="the directory to list the contents of", argstr="", mandatory=True, @@ -71,23 +73,22 @@ class Outputs: ) elif request.param == "dynamic": - Ls = shell.task( - "Ls", - executable="ls", + Ls = shell.interface( + "ls", inputs={ - "directory": { - "type": os.PathLike, - "help_string": "the directory to list the contents of", - "argstr": "", - "mandatory": True, - "position": -1, - }, - "hidden": { - "type": bool, - "help_string": "display hidden FS objects", - "argstr": "-a", - }, - "long_format": { + "directory": shell.arg( + type=File, + help_string="the directory to list the contents of", + argstr="", + mandatory=True, + position=-1, + ), + "hidden": shell.arg( + type=bool, + help_string="display hidden FS objects", + argstr="-a", + ), + "long_format": { # Mix it up with a full dictionary based definition "type": bool, "help_string": ( "display properties of FS object, such as permissions, size and " @@ -95,34 +96,35 @@ class Outputs: ), "argstr": "-l", }, - "human_readable": { - "type": bool, - "help_string": "display file sizes in human readable form", - "argstr": "-h", - "requires": ["long_format"], - }, - "complete_date": { - "type": bool, - "help_string": "Show complete date in long format", - "argstr": "-T", - "requires": ["long_format"], - "xor": ["date_format_str"], - }, - "date_format_str": { - "type": str, - "help_string": "format string for ", - "argstr": "-D", - "requires": ["long_format"], - "xor": ["complete_date"], - }, + "human_readable": shell.arg( + type=bool, + help_string="display file sizes in human readable form", + argstr="-h", + requires=["long_format"], + ), + "complete_date": shell.arg( + type=bool, + help_string="Show complete date in long format", + argstr="-T", + requires=["long_format"], + xor=["date_format_str"], + ), + "date_format_str": shell.arg( + type=str, + help_string="format string for ", + argstr="-D", + requires=["long_format"], + xor=["complete_date"], + ), }, outputs={ - "entries": { - "type": list, - "help_string": "list of entries returned by ls command", - "callable": list_entries, - } + "entries": shell.out( + type=list, + help_string="list of entries returned by ls command", + callable=list_entries, + ) }, + name="Ls", ) else: @@ -132,23 +134,19 @@ class Outputs: def test_shell_fields(Ls): - assert [a.name for a in attrs.fields(Ls.Inputs)] == [ - "executable", - "args", - "directory", - "hidden", - "long_format", - "human_readable", - "complete_date", - "date_format_str", - ] + assert sorted([a.name for a in list_fields(Ls)]) == sorted( + [ + "executable", + "directory", + "hidden", + "long_format", + "human_readable", + "complete_date", + "date_format_str", + ] + ) - assert [a.name for a in attrs.fields(Ls.Outputs)] == [ - "return_code", - "stdout", - "stderr", - "entries", - ] + assert [a.name for a in list_fields(Ls.Outputs)] == ["entries"] def test_shell_pickle_roundtrip(Ls, tmpdir): @@ -186,37 +184,37 @@ def test_shell_run(Ls, tmpdir): def A(request): if request.param == "static": - @shell_task + @shell.interface class A: executable = "cp" - class Inputs: - x: os.PathLike = shell_arg( - help_string="an input file", argstr="", position=0 - ) - y: str = shell_arg( + x: File = shell.arg(help_string="an input file", argstr="", position=0) + + class Outputs: + y: File = shell.outarg( help_string="path of output file", - output_file_template="{x}_out", - argstr="", + file_template="{x}_out", ) elif request.param == "dynamic": - A = shell_task( + A = shell.interface( "A", executable="cp", - input_fields={ - "x": { - "type": os.PathLike, - "help_string": "an input file", - "argstr": "", - "position": 0, - }, - "y": { - "type": str, - "help_string": "path of output file", - "argstr": "", - "output_file_template": "{x}_out", - }, + inputs={ + "x": shell.arg( + type=File, + help_string="an input file", + argstr="", + position=0, + ), + }, + outputs={ + "y": shell.outarg( + type=File, + help_string="path of output file", + argstr="", + output_file_template="{x}_out", + ), }, ) else: @@ -230,46 +228,86 @@ def test_shell_output_file_template(A): def test_shell_output_field_name_static(): - @shell_task + @shell.interface class A: + """Copy a file""" + executable = "cp" - class Inputs: - x: os.PathLike = shell_arg( - help_string="an input file", argstr="", position=0 - ) - y: str = shell_arg( - help_string="path of output file", - output_file_template="{x}_out", - output_field_name="y_out", + x: File = shell.arg(help_string="an input file", argstr="", position=1) + + class Outputs: + y: File = shell.outarg( + help_string="the output file", + file_template="{x}_out", argstr="", + position=-1, ) - assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] + assert sorted([a.name for a in attrs.fields(A)]) == ["executable", "x", "y"] + assert [a.name for a in attrs.fields(A.Outputs)] == ["y"] + inputs = sorted(list_fields(A), key=attrgetter("name")) + outputs = sorted(list_fields(A.Outputs), key=attrgetter("name")) + assert inputs == [ + shell.arg( + name="executable", + default="cp", + type=str, + argstr="", + position=0, + ), + shell.arg( + name="x", + type=File, + help_string="an input file", + argstr="", + position=1, + ), + shell.outarg( + name="y", + type=File, + help_string="the output file", + file_template="{x}_out", + argstr="", + position=-1, + ), + ] + assert outputs == [ + shell.outarg( + name="y", + type=File, + help_string="the output file", + file_template="{x}_out", + argstr="", + position=-1, + ) + ] def test_shell_output_field_name_dynamic(): - A = shell_task( - "A", - executable="cp", - input_fields={ - "x": { - "type": os.PathLike, - "help_string": "an input file", - "argstr": "", - "position": 0, - }, - "y": { - "type": str, - "help_string": "path of output file", - "argstr": "", - "output_field_name": "y_out", - "output_file_template": "{x}_out", - }, + A = shell.interface( + "cp", + name="A", + inputs={ + "x": shell.arg( + type=File, + help_string="an input file", + argstr="", + position=1, + ), + }, + outputs={ + "y": shell.outarg( + type=File, + help_string="path of output file", + argstr="", + template_field="y_out", + file_template="{x}_out", + ), }, ) - assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] + assert "y" in [a.name for a in attrs.fields(A.Outputs)] def get_file_size(y: Path): @@ -278,9 +316,9 @@ def get_file_size(y: Path): def test_shell_bases_dynamic(A, tmpdir): - B = shell_task( + B = shell.interface( "B", - output_fields={ + outputs={ "out_file_size": { "type": int, "help_string": "size of the output directory", @@ -303,10 +341,10 @@ def test_shell_bases_dynamic(A, tmpdir): def test_shell_bases_static(A, tmpdir): - @shell_task + @shell.interface class B(A): class Outputs: - out_file_size: int = shell_out( + out_file_size: int = shell.out( help_string="size of the output directory", callable=get_file_size ) @@ -323,38 +361,37 @@ class Outputs: def test_shell_inputs_outputs_bases_dynamic(tmpdir): - A = shell_task( - "A", + A = shell.interface( "ls", - input_fields={ - "directory": { - "type": os.PathLike, - "help_string": "input directory", - "argstr": "", - "position": -1, - } + name="A", + inputs={ + "directory": shell.arg( + type=File, + help_string="input directory", + argstr="", + position=-1, + ) }, - output_fields={ - "entries": { - "type": list, - "help_string": "list of entries returned by ls command", - "callable": list_entries, - } + outputs={ + "entries": shell.out( + type=list, + help_string="list of entries returned by ls command", + callable=list_entries, + ) }, ) - B = shell_task( - "B", + B = shell.interface( "ls", - input_fields={ - "hidden": { - "type": bool, - "argstr": "-a", - "help_string": "show hidden files", - "default": False, - } + name="B", + inputs={ + "hidden": shell.arg( + type=bool, + argstr="-a", + help_string="show hidden files", + default=False, + ) }, bases=[A], - inputs_bases=[A.Inputs], ) Path.touch(tmpdir / ".hidden") @@ -370,25 +407,25 @@ def test_shell_inputs_outputs_bases_dynamic(tmpdir): def test_shell_inputs_outputs_bases_static(tmpdir): - @shell_task + @shell.interface class A: executable = "ls" class Inputs: - directory: os.PathLike = shell_arg( + directory: Directory = shell.arg( help_string="input directory", argstr="", position=-1 ) class Outputs: - entries: list = shell_out( + entries: list = shell.out( help_string="list of entries returned by ls command", callable=list_entries, ) - @shell_task + @shell.interface class B(A): class Inputs(A.Inputs): - hidden: bool = shell_arg( + hidden: bool = shell.arg( help_string="show hidden files", argstr="-a", default=False, @@ -408,15 +445,14 @@ class Inputs(A.Inputs): def test_shell_missing_executable_static(): with pytest.raises(RuntimeError, match="should contain an `executable`"): - @shell_task + @shell.interface class A: - class Inputs: - directory: os.PathLike = shell_arg( - help_string="input directory", argstr="", position=-1 - ) + directory: Directory = shell.arg( + help_string="input directory", argstr="", position=-1 + ) class Outputs: - entries: list = shell_out( + entries: list = shell.out( help_string="list of entries returned by ls command", callable=list_entries, ) @@ -424,23 +460,23 @@ class Outputs: def test_shell_missing_executable_dynamic(): with pytest.raises(RuntimeError, match="should contain an `executable`"): - A = shell_task( + shell.interface( "A", executable=None, - input_fields={ - "directory": { - "type": os.PathLike, - "help_string": "input directory", - "argstr": "", - "position": -1, - } + inputs={ + "directory": shell.arg( + type=Directory, + help_string="input directory", + argstr="", + position=-1, + ), }, - output_fields={ - "entries": { - "type": list, - "help_string": "list of entries returned by ls command", - "callable": list_entries, - } + outputs={ + "entries": shell.out( + type=list, + help_string="list of entries returned by ls command", + callable=list_entries, + ) }, ) @@ -448,12 +484,12 @@ def test_shell_missing_executable_dynamic(): def test_shell_missing_inputs_static(): with pytest.raises(RuntimeError, match="should contain an `Inputs`"): - @shell_task + @shell.interface class A: executable = "ls" class Outputs: - entries: list = shell_out( + entries: list = shell.out( help_string="list of entries returned by ls command", callable=list_entries, ) @@ -461,6 +497,7 @@ class Outputs: def test_shell_decorator_misuse(A): with pytest.raises( - RuntimeError, match=("`shell_task` should not be provided any other arguments") + RuntimeError, + match=("`shell.interface` should not be provided any other arguments"), ): - shell_task(A, executable="cp") + shell.interface(A, executable="cp") From 0aa12cf2760ea33d95bb36924841d52fa4a00986 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 12 Nov 2024 11:12:43 +1100 Subject: [PATCH 021/342] debugging shell interface syntax --- pydra/design/base.py | 14 ++++++++++-- pydra/design/python.py | 10 ++++++++- pydra/design/shell.py | 4 ++++ pydra/design/tests/test_shell.py | 38 +++++++++++++++----------------- 4 files changed, 43 insertions(+), 23 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index c9d96d4140..629113cd87 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -297,6 +297,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: def get_outputs_class(klass: type | None = None) -> type | None: + """Get the Outputs class from the nested "Outputs" class or from the Interface class + args""" if klass is None: return None try: @@ -319,14 +321,17 @@ def make_interface( outputs: list[Out], klass: type | None = None, name: str | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), ): assert isinstance(inputs, list) assert isinstance(outputs, list) if name is None and klass is not None: name = klass.__name__ outputs_klass = get_outputs_class(klass) + if outputs_klass is None: - outputs_klass = type("Outputs", (), {}) + outputs_klass = type("Outputs", tuple(outputs_bases), {}) else: # Ensure that the class has it's own annotations dict so we can modify it without # messing up other classes @@ -348,9 +353,14 @@ def make_interface( if klass is None or not issubclass(klass, Interface): if name is None: raise ValueError("name must be provided if klass is not") + bases = tuple(bases) + if not any(issubclass(b, Interface) for b in bases): + bases = bases + (Interface,) + if klass is not None: + bases += tuple(c for c in klass.__mro__ if c not in bases + (object,)) klass = types.new_class( name=name, - bases=(Interface[outputs_klass],), + bases=bases, kwds={}, exec_body=lambda ns: ns.update( {"Task": task_type, "Outputs": outputs_klass} diff --git a/pydra/design/python.py b/pydra/design/python.py index bda4b89363..2aa43949df 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -33,6 +33,8 @@ def interface( /, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, ) -> Interface: """ @@ -81,7 +83,13 @@ def make(wrapped: ty.Callable | type) -> Interface: output_helps=output_helps, ) interface = make_interface( - FunctionTask, parsed_inputs, parsed_outputs, name=name, klass=klass + FunctionTask, + parsed_inputs, + parsed_outputs, + name=name, + klass=klass, + bases=bases, + outputs_bases=outputs_bases, ) # Set the function in the created class interface.function = function diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 0657da46ed..76bb9c11cd 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -176,6 +176,8 @@ def interface( /, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, args_last: bool = False, name: str | None = None, @@ -257,6 +259,8 @@ def make( parsed_outputs, name=class_name, klass=klass, + bases=bases, + outputs_bases=outputs_bases, ) return interface diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index c07e02bd54..74fd8457a9 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -77,7 +77,7 @@ class Outputs: "ls", inputs={ "directory": shell.arg( - type=File, + type=Directory, help_string="the directory to list the contents of", argstr="", mandatory=True, @@ -366,7 +366,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmpdir): name="A", inputs={ "directory": shell.arg( - type=File, + type=Directory, help_string="input directory", argstr="", position=-1, @@ -394,16 +394,16 @@ def test_shell_inputs_outputs_bases_dynamic(tmpdir): bases=[A], ) - Path.touch(tmpdir / ".hidden") + hidden = File.sample(tmpdir, stem=".hidden") b = B(directory=tmpdir, hidden=True) - assert b.inputs.directory == tmpdir - assert b.inputs.hidden - assert b.cmdline == f"ls -a {tmpdir}" + assert b.directory == Directory(tmpdir) + assert b.hidden - result = b() - assert result.output.entries == [".", "..", ".hidden"] + # result = b() + # assert result.runner.cmdline == f"ls -a {tmpdir}" + # assert result.output.entries == [".", "..", ".hidden"] def test_shell_inputs_outputs_bases_static(tmpdir): @@ -411,10 +411,9 @@ def test_shell_inputs_outputs_bases_static(tmpdir): class A: executable = "ls" - class Inputs: - directory: Directory = shell.arg( - help_string="input directory", argstr="", position=-1 - ) + directory: Directory = shell.arg( + help_string="input directory", argstr="", position=-1 + ) class Outputs: entries: list = shell.out( @@ -424,12 +423,11 @@ class Outputs: @shell.interface class B(A): - class Inputs(A.Inputs): - hidden: bool = shell.arg( - help_string="show hidden files", - argstr="-a", - default=False, - ) + hidden: bool = shell.arg( + help_string="show hidden files", + argstr="-a", + default=False, + ) Path.touch(tmpdir / ".hidden") @@ -438,8 +436,8 @@ class Inputs(A.Inputs): assert b.inputs.directory == tmpdir assert b.inputs.hidden - result = b() - assert result.output.entries == [".", "..", ".hidden"] + # result = b() + # assert result.output.entries == [".", "..", ".hidden"] def test_shell_missing_executable_static(): From 8581f5550f9288a7dc5798bc1650f6eb1d36e5d2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 12 Nov 2024 12:54:04 +1100 Subject: [PATCH 022/342] debugging type hints --- pydra/design/base.py | 24 ++++++++++++++---------- pydra/design/tests/test_shell.py | 5 ++--- pydra/engine/__init__.py | 21 +++++++++++++++++++++ pydra/engine/core.py | 6 +++--- pydra/utils/tests/test_hash.py | 2 +- 5 files changed, 41 insertions(+), 17 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 629113cd87..b16dc2d10a 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -448,10 +448,10 @@ def extract_inputs_and_outputs_from_function( """Extract input output types and output names from the function source if they aren't explicitly""" sig = inspect.signature(function) - input_types = { - p.name: (p.annotation if p.annotation is not inspect._empty else ty.Any) - for p in sig.parameters.values() - } + type_hints = ty.get_type_hints(function) + input_types = {} + for p in sig.parameters.values(): + input_types[p.name] = type_hints.get(p.name, ty.Any) if inputs: if not isinstance(inputs, dict): raise ValueError( @@ -468,9 +468,7 @@ def extract_inputs_and_outputs_from_function( inpt.type = type_ else: inputs = input_types - return_type = ( - sig.return_annotation if sig.return_annotation is not inspect._empty else ty.Any - ) + return_type = type_hints.get("return", ty.Any) if outputs is None: src = inspect.getsource(function).strip() return_lines = re.findall(r"\n\s+return .*$", src) @@ -481,9 +479,11 @@ def extract_inputs_and_outputs_from_function( .group(1) .split(",") ] - if all(re.match(r"^\w+$", o) for o in implicit_outputs): + if len(implicit_outputs) > 1 and all( + re.match(r"^\w+$", o) for o in implicit_outputs + ): outputs = implicit_outputs - if len(outputs) > 1: + if outputs and len(outputs) > 1: if return_type is not ty.Any: if ty.get_origin(return_type) is not tuple: raise ValueError( @@ -505,7 +505,11 @@ def extract_inputs_and_outputs_from_function( outputs = output_types elif outputs: - output_name, output = next(iter(outputs.items())) + if isinstance(outputs, dict): + output_name, output = next(iter(outputs.items())) + elif isinstance(outputs, list): + output_name = outputs[0] + output = ty.Any if isinstance(output, Out): if output.type is ty.Any: output.type = return_type diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 74fd8457a9..2e28010e70 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -457,7 +457,7 @@ class Outputs: def test_shell_missing_executable_dynamic(): - with pytest.raises(RuntimeError, match="should contain an `executable`"): + with pytest.raises(AttributeError, match="should contain an `executable`"): shell.interface( "A", executable=None, @@ -480,11 +480,10 @@ def test_shell_missing_executable_dynamic(): def test_shell_missing_inputs_static(): - with pytest.raises(RuntimeError, match="should contain an `Inputs`"): + with pytest.raises(AttributeError, match="should contain an `Inputs`"): @shell.interface class A: - executable = "ls" class Outputs: entries: list = shell.out( diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index 3674736c83..c532bc7793 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -2,8 +2,29 @@ from .submitter import Submitter from .core import Workflow +import __main__ +import logging +from ._version import __version__ __all__ = [ "Submitter", "Workflow", + "logger", + "check_latest_version", ] + +logger = logging.getLogger("pydra") + + +def check_latest_version(): + import etelemetry + + return etelemetry.check_available_version("nipype/pydra", __version__, lgr=logger) + + +# Run telemetry on import for interactive sessions, such as IPython, Jupyter notebooks, Python REPL +if not hasattr(__main__, "__file__"): + from .engine.core import TaskBase + + if TaskBase._etelemetry_version_data is None: + TaskBase._etelemetry_version_data = check_latest_version() diff --git a/pydra/engine/core.py b/pydra/engine/core.py index c0fb2ad17e..c8b0089cbe 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -133,7 +133,7 @@ def __init__( messengers : TODO """ - from .. import check_latest_version + from . import check_latest_version if Task._etelemetry_version_data is None: Task._etelemetry_version_data = check_latest_version() @@ -150,13 +150,13 @@ def __init__( **{ # in attrs names that starts with "_" could be set when name provided w/o "_" (f.name[1:] if f.name.startswith("_") else f.name): f.default - for f in attr.fields(self.interface) + for f in attr.fields(type(self.interface)) } ) self.input_names = [ field.name - for field in attr.fields(self.interface) + for field in attr.fields(type(self.interface)) if field.name not in ["_func", "_graph_checksums"] ] diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index de065a03de..f81b5aedec 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -423,7 +423,7 @@ def __repr__(self): with pytest.raises( TypeError, match=( - "unhashable\nand therefore cannot hash `A\(\)` of type " + "unhashable\nand therefore cannot hash `A()` of type " "`pydra.utils.tests.test_hash.A`" ), ): From 4b408a3e4e160cb7a020f35771576b9ed9c269c2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 15 Nov 2024 10:56:25 +1100 Subject: [PATCH 023/342] debugged test_shell unittests --- pydra/design/base.py | 39 ++++++------ pydra/design/shell.py | 105 ++++++++++++++++++++++++++----- pydra/design/tests/test_shell.py | 92 ++++++++++++++------------- 3 files changed, 154 insertions(+), 82 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index b16dc2d10a..24aaf35803 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -262,8 +262,11 @@ def get_fields_from_class( def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: """Get the fields from a class""" fields_dict = {} + # Get fields defined in base classes if present + for field in list_fields(klass): + fields_dict[field.name] = field for atr_name in dir(klass): - if atr_name.startswith("__"): + if atr_name in fields_dict or atr_name.startswith("__"): continue try: atr = getattr(klass, atr_name) @@ -328,26 +331,18 @@ def make_interface( assert isinstance(outputs, list) if name is None and klass is not None: name = klass.__name__ - outputs_klass = get_outputs_class(klass) - - if outputs_klass is None: - outputs_klass = type("Outputs", tuple(outputs_bases), {}) - else: - # Ensure that the class has it's own annotations dict so we can modify it without - # messing up other classes - outputs_klass.__annotations__ = copy(outputs_klass.__annotations__) - # Now that we have saved the attributes in lists to be - for out in outputs: - setattr( - outputs_klass, - out.name, - attrs.field( - converter=get_converter(out, outputs_klass.__name__), - metadata={PYDRA_ATTR_METADATA: out}, - on_setattr=attrs.setters.convert, - ), - ) - outputs_klass.__annotations__[out.name] = out.type + outputs_klass = type( + "Outputs", + tuple(outputs_bases), + { + o.name: attrs.field( + converter=get_converter(o, f"{name}.Outputs"), + metadata={PYDRA_ATTR_METADATA: o}, + ) + for o in outputs + }, + ) + outputs_klass.__annotations__.update((o.name, o.type) for o in outputs) outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) if klass is None or not issubclass(klass, Interface): @@ -613,6 +608,8 @@ def split_block(string: str) -> ty.Generator[str, None, None]: def list_fields(interface: Interface) -> list[Field]: + if not attrs.has(interface): + return [] return [ f.metadata[PYDRA_ATTR_METADATA] for f in attrs.fields(interface) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 76bb9c11cd..4f816b0df0 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -58,9 +58,10 @@ class arg(Arg): from name to field, by default it is None argstr: str, optional A flag or string that is used in the command before the value, e.g. -v or - -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, - the flag is used before every element if a list is provided as a value. If no - argstr is used the field is not part of the command. + -v {inp_field}, but it could be and empty string, “”, in which case the value is + just printed to the command line. If … are used, e.g. -v…, + the flag is used before every element if a list is provided as a value. If the + argstr is None, the field is not part of the command. position: int, optional Position of the field in the command, could be nonnegative or negative integer. If nothing is provided the field will be inserted between all fields with @@ -78,7 +79,7 @@ class arg(Arg): field will be sent). """ - argstr: str | None = None + argstr: str | None = "" position: int | None = None sep: str | None = None allowed_values: list | None = None @@ -207,8 +208,18 @@ def make( if inspect.isclass(wrapped): klass = wrapped - executable = klass.executable - class_name = klass.__name__ if not name else name + executable: str + try: + executable = attrs.fields(klass).executable.default + except (AttributeError, attrs.exceptions.NotAnAttrsClassError): + try: + executable = klass.executable + except AttributeError: + raise AttributeError( + f"Shell task class {wrapped} must have an `executable` " + "attribute that specifies the command to run" + ) from None + class_name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) parsed_inputs, parsed_outputs = get_fields_from_class( klass, arg, out, auto_attribs @@ -238,20 +249,20 @@ def make( ) class_name = executable if not name else name - parsed_inputs.append( - arg(name="executable", type=str, argstr="", position=0, default=executable) + # Update the inputs (overriding inputs from base classes) with the executable + # and the output argument fields + inputs_dict = {i.name: i for i in parsed_inputs} + inputs_dict.update({o.name: o for o in parsed_outputs if isinstance(o, arg)}) + inputs_dict["executable"] = arg( + name="executable", type=str, argstr="", position=0, default=executable ) - # Copy the outputs into the inputs if they are outargs - parsed_inputs.extend(o for o in parsed_outputs if isinstance(o, arg)) + parsed_inputs = list(inputs_dict.values()) - # Check position values are unique - positions = defaultdict(list) + # Set positions for the remaining inputs that don't have an explicit position + position_stack = list(reversed(remaining_positions(parsed_inputs))) for inpt in parsed_inputs: - positions[inpt.position].append(inpt.name) - if multiple_positions := {k: v for k, v in positions.items() if len(v) > 1}: - raise ValueError( - f"Multiple fields have the same position: {multiple_positions}" - ) + if inpt.position is None: + inpt.position = position_stack.pop() interface = make_interface( ShellCommandTask, @@ -264,6 +275,27 @@ def make( ) return interface + # If a name is provided (and hence not being used as a decorator), check to see if + # we are extending from a class that already defines an executable + if wrapped is None and name is not None: + for base in bases: + try: + wrapped = attrs.fields(base).executable.default + except (AttributeError, attrs.exceptions.NotAnAttrsClassError): + try: + wrapped = base.executable + except AttributeError: + pass + if wrapped: + break + if wrapped is None: + raise ValueError( + f"name ({name!r}) can only be provided when creating a class " + "dynamically, i.e. not using it as a decorator. Check to see " + "whether you have forgotten to provide the command line template" + ) + # If wrapped is provided (i.e. this is not being used as a decorator), return the + # interface class if wrapped is not None: if not isinstance(wrapped, (type, str)): raise ValueError(f"wrapped must be a class or a string, not {wrapped!r}") @@ -419,6 +451,45 @@ def add_option(opt): return executable, inferred_inputs, inferred_outputs +def remaining_positions(args: list[Arg], num_args: int | None = None) -> ty.List[int]: + """Get the remaining positions for input fields + + Parameters + ---------- + args : list[Arg] + The list of input fields + num_args : int, optional + The number of arguments, by default it is the length of the args + + Returns + ------- + list[int] + The list of remaining positions + + Raises + ------ + ValueError + If multiple fields have the same position + """ + if num_args is None: + num_args = len(args) + # Check for multiple positions + positions = defaultdict(list) + for arg in args: + if arg.position is not None: + if arg.position >= 0: + positions[arg.position].append(arg) + else: + positions[num_args + arg.position].append(arg) + if multiple_positions := { + k: f"{v.name}({v.position})" for k, v in positions.items() if len(v) > 1 + }: + raise ValueError( + f"Multiple fields have the overlapping positions: {multiple_positions}" + ) + return [i for i in range(num_args) if i not in positions] + + # def interface( # klass_or_name: ty.Union[type, str], # executable: ty.Optional[str] = None, diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 2e28010e70..97fb786ced 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -160,6 +160,7 @@ def test_shell_pickle_roundtrip(Ls, tmpdir): assert RereadLs is Ls +@pytest.mark.xfail(reason="Still need to update tasks to use new shell interface") def test_shell_run(Ls, tmpdir): Path.touch(tmpdir / "a") Path.touch(tmpdir / "b") @@ -186,26 +187,37 @@ def A(request): @shell.interface class A: + """An example shell interface described in a class + + Parameters + ---------- + x : File + an input file + """ + executable = "cp" - x: File = shell.arg(help_string="an input file", argstr="", position=0) + x: File = shell.arg(argstr="", position=1) class Outputs: - y: File = shell.outarg( - help_string="path of output file", - file_template="{x}_out", - ) + """The outputs of the example shell interface + + Parameters + ---------- + y : File + path of output file""" + + y: File = shell.outarg(file_template="{x}_out", position=-1) elif request.param == "dynamic": A = shell.interface( - "A", - executable="cp", + "cp", inputs={ "x": shell.arg( type=File, help_string="an input file", argstr="", - position=0, + position=1, ), }, outputs={ @@ -213,9 +225,10 @@ class Outputs: type=File, help_string="path of output file", argstr="", - output_file_template="{x}_out", + file_template="{x}_out", ), }, + name="A", ) else: assert False @@ -317,7 +330,10 @@ def get_file_size(y: Path): def test_shell_bases_dynamic(A, tmpdir): B = shell.interface( - "B", + name="B", + inputs={ + "y": shell.arg(type=File, help_string="output file", argstr="", position=-1) + }, outputs={ "out_file_size": { "type": int, @@ -331,18 +347,23 @@ def test_shell_bases_dynamic(A, tmpdir): xpath = tmpdir / "x.txt" ypath = tmpdir / "y.txt" Path.touch(xpath) + Path.touch(ypath) - b = B(x=xpath, y=str(ypath)) + b = B(x=xpath, y=ypath) - result = b() + assert b.x == File(xpath) + assert b.y == File(ypath) - assert b.inputs.x == xpath - assert result.output.y == str(ypath) + # result = b() + # assert result.output.y == str(ypath) def test_shell_bases_static(A, tmpdir): @shell.interface class B(A): + + y: File + class Outputs: out_file_size: int = shell.out( help_string="size of the output directory", callable=get_file_size @@ -351,13 +372,15 @@ class Outputs: xpath = tmpdir / "x.txt" ypath = tmpdir / "y.txt" Path.touch(xpath) + Path.touch(ypath) b = B(x=xpath, y=str(ypath)) - result = b() + assert b.x == File(xpath) + assert b.y == File(ypath) - assert b.inputs.x == xpath - assert result.output.y == str(ypath) + # result = b() + # assert result.output.y == str(ypath) def test_shell_inputs_outputs_bases_dynamic(tmpdir): @@ -433,15 +456,15 @@ class B(A): b = B(directory=tmpdir, hidden=True) - assert b.inputs.directory == tmpdir - assert b.inputs.hidden + assert b.directory == Directory(tmpdir) + assert b.hidden # result = b() # assert result.output.entries == [".", "..", ".hidden"] def test_shell_missing_executable_static(): - with pytest.raises(RuntimeError, match="should contain an `executable`"): + with pytest.raises(AttributeError, match="must have an `executable` attribute"): @shell.interface class A: @@ -457,10 +480,12 @@ class Outputs: def test_shell_missing_executable_dynamic(): - with pytest.raises(AttributeError, match="should contain an `executable`"): + with pytest.raises( + ValueError, + match=r"name \('A'\) can only be provided when creating a class dynamically", + ): shell.interface( - "A", - executable=None, + name="A", inputs={ "directory": shell.arg( type=Directory, @@ -477,24 +502,3 @@ def test_shell_missing_executable_dynamic(): ) }, ) - - -def test_shell_missing_inputs_static(): - with pytest.raises(AttributeError, match="should contain an `Inputs`"): - - @shell.interface - class A: - - class Outputs: - entries: list = shell.out( - help_string="list of entries returned by ls command", - callable=list_entries, - ) - - -def test_shell_decorator_misuse(A): - with pytest.raises( - RuntimeError, - match=("`shell.interface` should not be provided any other arguments"), - ): - shell.interface(A, executable="cp") From 6cf4e1b25b0da5973ecb7f6eb4d41b5a550ff5f0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 15 Nov 2024 11:29:35 +1100 Subject: [PATCH 024/342] added tests for shell command templates --- pydra/design/tests/test_python.py | 2 +- pydra/design/tests/test_shell.py | 163 ++++++++++++++++++++++++------ 2 files changed, 132 insertions(+), 33 deletions(-) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index ef175516ce..a8c954e51d 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -30,7 +30,7 @@ def sample_interface(a: int) -> float: ] -def test_interface_wrap_function_type(): +def test_interface_wrap_function_types(): def sample_interface(a: int) -> int: """Sample function with inputs and outputs""" return a * 2 diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 97fb786ced..f88cda604b 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -1,21 +1,101 @@ import os -import tempfile from operator import attrgetter from pathlib import Path import attrs import pytest import cloudpickle as cp from pydra.design import shell, Interface, list_fields -from fileformats.generic import File, Directory +from fileformats.generic import File, Directory, FsObject, SetOf +from fileformats import field -def list_entries(stdout): - return stdout.split("\n")[:-1] +def test_interface_template(): + + SampleInterface = shell.interface( + ( + "cp -R -v " + "--text-arg --int-arg " + # "--tuple-arg " + ), + ) + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) + assert inputs == [ + shell.arg(name="in_paths", type=SetOf[FsObject], position=1), + shell.outarg(name="out_path", type=FsObject, position=2), + shell.arg(name="recursive", type=bool, position=3), + shell.arg(name="verbose", type=bool, position=4), + shell.arg(name="text_arg", type=field.Text, position=5), + shell.arg(name="int_arg", type=field.Integer, position=6), + # shell.arg(name="tuple_arg", type=tuple[field.Integer,field.Text], position=6), + ] + assert outputs == [ + shell.outarg(name="out_path", type=FsObject, position=2), + ] -@pytest.fixture -def tmpdir(): - return Path(tempfile.mkdtemp()) +def test_interface_template_with_overrides(): + + RECURSIVE_HELP = ( + "If source_file designates a directory, cp copies the directory and the entire " + "subtree connected at that point." + ) + + SampleInterface = shell.interface( + ( + "cp -R -v " + "--text-arg --int-arg " + # "--tuple-arg " + ), + inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, + outputs={"out_path": shell.outarg(position=-1)}, + ) + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) + assert inputs == [ + shell.arg(name="in_paths", type=SetOf[FsObject], position=1), + shell.arg(name="recursive", type=bool, help_string=RECURSIVE_HELP, position=2), + shell.arg(name="verbose", type=bool, position=3), + shell.arg(name="text_arg", type=field.Text, position=4), + shell.arg(name="int_arg", type=field.Integer, position=5), + # shell.arg(name="tuple_arg", type=tuple[field.Integer,field.Text], position=6), + shell.outarg(name="out_path", type=FsObject, position=-1), + ] + assert outputs == [ + shell.outarg(name="out_path", type=FsObject, position=-1), + ] + + +def test_interface_template_with_type_overrides(): + + SampleInterface = shell.interface( + ( + "cp -R -v " + "--text-arg --int-arg " + # "--tuple-arg " + ), + inputs={"text_arg": str, "int_arg": int}, + ) + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) + assert inputs == [ + shell.arg(name="in_paths", type=SetOf[FsObject], position=1), + shell.arg(name="recursive", type=bool, position=2), + shell.arg(name="verbose", type=bool, position=3), + shell.arg(name="text_arg", type=str, position=4), + shell.arg(name="int_arg", type=int, position=5), + # shell.arg(name="tuple_arg", type=tuple[field.Integer,field.Text], position=6), + shell.outarg(name="out_path", type=FsObject, position=-1), + ] + assert outputs == [ + shell.outarg(name="out_path", type=FsObject, position=-1), + ] @pytest.fixture(params=["static", "dynamic"]) @@ -149,8 +229,8 @@ def test_shell_fields(Ls): assert [a.name for a in list_fields(Ls.Outputs)] == ["entries"] -def test_shell_pickle_roundtrip(Ls, tmpdir): - pkl_file = tmpdir / "ls.pkl" +def test_shell_pickle_roundtrip(Ls, tmp_path): + pkl_file = tmp_path / "ls.pkl" with open(pkl_file, "wb") as f: cp.dump(Ls, f) @@ -161,21 +241,21 @@ def test_shell_pickle_roundtrip(Ls, tmpdir): @pytest.mark.xfail(reason="Still need to update tasks to use new shell interface") -def test_shell_run(Ls, tmpdir): - Path.touch(tmpdir / "a") - Path.touch(tmpdir / "b") - Path.touch(tmpdir / "c") +def test_shell_run(Ls, tmp_path): + Path.touch(tmp_path / "a") + Path.touch(tmp_path / "b") + Path.touch(tmp_path / "c") - ls = Ls(directory=tmpdir, long_format=True) + ls = Ls(directory=tmp_path, long_format=True) # Test cmdline - assert ls.inputs.directory == tmpdir + assert ls.inputs.directory == tmp_path assert not ls.inputs.hidden assert ls.inputs.long_format - assert ls.cmdline == f"ls -l {tmpdir}" + assert ls.cmdline == f"ls -l {tmp_path}" # Drop Long format flag to make output simpler - ls = Ls(directory=tmpdir) + ls = Ls(directory=tmp_path) result = ls() assert result.output.entries == ["a", "b", "c"] @@ -328,7 +408,7 @@ def get_file_size(y: Path): return result.st_size -def test_shell_bases_dynamic(A, tmpdir): +def test_shell_bases_dynamic(A, tmp_path): B = shell.interface( name="B", inputs={ @@ -344,8 +424,8 @@ def test_shell_bases_dynamic(A, tmpdir): bases=[A], ) - xpath = tmpdir / "x.txt" - ypath = tmpdir / "y.txt" + xpath = tmp_path / "x.txt" + ypath = tmp_path / "y.txt" Path.touch(xpath) Path.touch(ypath) @@ -358,7 +438,7 @@ def test_shell_bases_dynamic(A, tmpdir): # assert result.output.y == str(ypath) -def test_shell_bases_static(A, tmpdir): +def test_shell_bases_static(A, tmp_path): @shell.interface class B(A): @@ -369,8 +449,8 @@ class Outputs: help_string="size of the output directory", callable=get_file_size ) - xpath = tmpdir / "x.txt" - ypath = tmpdir / "y.txt" + xpath = tmp_path / "x.txt" + ypath = tmp_path / "y.txt" Path.touch(xpath) Path.touch(ypath) @@ -383,7 +463,7 @@ class Outputs: # assert result.output.y == str(ypath) -def test_shell_inputs_outputs_bases_dynamic(tmpdir): +def test_shell_inputs_outputs_bases_dynamic(tmp_path): A = shell.interface( "ls", name="A", @@ -417,19 +497,19 @@ def test_shell_inputs_outputs_bases_dynamic(tmpdir): bases=[A], ) - hidden = File.sample(tmpdir, stem=".hidden") + hidden = File.sample(tmp_path, stem=".hidden") - b = B(directory=tmpdir, hidden=True) + b = B(directory=tmp_path, hidden=True) - assert b.directory == Directory(tmpdir) + assert b.directory == Directory(tmp_path) assert b.hidden # result = b() - # assert result.runner.cmdline == f"ls -a {tmpdir}" + # assert result.runner.cmdline == f"ls -a {tmp_path}" # assert result.output.entries == [".", "..", ".hidden"] -def test_shell_inputs_outputs_bases_static(tmpdir): +def test_shell_inputs_outputs_bases_static(tmp_path): @shell.interface class A: executable = "ls" @@ -452,11 +532,11 @@ class B(A): default=False, ) - Path.touch(tmpdir / ".hidden") + Path.touch(tmp_path / ".hidden") - b = B(directory=tmpdir, hidden=True) + b = B(directory=tmp_path, hidden=True) - assert b.directory == Directory(tmpdir) + assert b.directory == Directory(tmp_path) assert b.hidden # result = b() @@ -502,3 +582,22 @@ def test_shell_missing_executable_dynamic(): ) }, ) + + +def list_entries(stdout): + return stdout.split("\n")[:-1] + + +inp_sort_key = attrgetter("position") + + +def out_sort_key(out: shell.out) -> int: + LARGE_NUMBER = 1000000 + try: + pos = out.position + except AttributeError: + pos = LARGE_NUMBER + else: + if pos < 0: + pos = LARGE_NUMBER + pos + return pos From 8cbeade9f1e05d77a65b7328f970d73578550bd2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 15 Nov 2024 11:42:37 +1100 Subject: [PATCH 025/342] cleaning up shell tests --- pydra/design/shell.py | 3 --- pydra/design/tests/test_shell.py | 25 ++++++++++++++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 4f816b0df0..fb6813c7b5 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -371,7 +371,6 @@ def parse_command_line_template( arguments = [] options = [] option = None - position = 1 def merge_or_create_field(name, field_type, type): """Merge the typing information with an existing field if it exists""" @@ -425,13 +424,11 @@ def add_option(opt): type_ = from_mime(type_str) if type_str is not None else ty.Any if option is None: arguments.append(merge_or_create_field(name, field_type, type_)) - position += 1 else: option[1].append((name, type_)) elif match := opt_re.match(token): if option is not None: add_option(option) - position += 1 option = (match.group(1), field_type, []) if option is not None: add_option(option) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index f88cda604b..76e3fed362 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -11,6 +11,23 @@ def test_interface_template(): + SampleInterface = shell.interface("cp ") + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) + assert inputs == [ + shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg(name="in_path", type=FsObject, position=1), + shell.outarg(name="out_path", type=FsObject, position=2), + ] + assert outputs == [ + shell.outarg(name="out_path", type=FsObject, position=2), + ] + + +def test_interface_template_more_complex(): + SampleInterface = shell.interface( ( "cp -R -v " @@ -23,7 +40,8 @@ def test_interface_template(): inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) assert inputs == [ - shell.arg(name="in_paths", type=SetOf[FsObject], position=1), + shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg(name="in_paths", type=SetOf[FsObject], position=1, sep=" "), shell.outarg(name="out_path", type=FsObject, position=2), shell.arg(name="recursive", type=bool, position=3), shell.arg(name="verbose", type=bool, position=4), @@ -57,7 +75,8 @@ def test_interface_template_with_overrides(): inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) assert inputs == [ - shell.arg(name="in_paths", type=SetOf[FsObject], position=1), + shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg(name="in_paths", type=SetOf[FsObject], position=1, sep=" "), shell.arg(name="recursive", type=bool, help_string=RECURSIVE_HELP, position=2), shell.arg(name="verbose", type=bool, position=3), shell.arg(name="text_arg", type=field.Text, position=4), @@ -85,7 +104,7 @@ def test_interface_template_with_type_overrides(): inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) assert inputs == [ - shell.arg(name="in_paths", type=SetOf[FsObject], position=1), + shell.arg(name="in_paths", type=SetOf[FsObject], position=1, sep=" "), shell.arg(name="recursive", type=bool, position=2), shell.arg(name="verbose", type=bool, position=3), shell.arg(name="text_arg", type=str, position=4), From 79e98f60de74cb1676d9a498fd8b75424e9bd332 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 16 Nov 2024 11:58:21 +1100 Subject: [PATCH 026/342] capture defaults in pydra.design --- pydra/design/base.py | 72 +++++++++++++++++--------------- pydra/design/tests/test_shell.py | 12 +++--- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 24aaf35803..bd08997b4f 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -265,8 +265,12 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: # Get fields defined in base classes if present for field in list_fields(klass): fields_dict[field.name] = field + type_hints = ty.get_type_hints(klass) for atr_name in dir(klass): - if atr_name in fields_dict or atr_name.startswith("__"): + if atr_name in list(fields_dict) + [ + "Task", + "Outputs", + ] or atr_name.startswith("__"): continue try: atr = getattr(klass, atr_name) @@ -275,49 +279,39 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: if isinstance(atr, Field): atr.name = atr_name fields_dict[atr_name] = atr - for atr_name, type_ in klass.__annotations__.items(): - try: - fields_dict[atr_name].type = type_ - except KeyError: - if auto_attribs: - fields_dict[atr_name] = field_type(name=atr_name, type=type_) - for atr_name, help in helps.items(): - try: - fields_dict[atr_name].help_string = help - except KeyError: - pass + if atr_name in type_hints: + atr.type = type_hints[atr_name] + if not atr.help_string: + atr.help_string = helps.get(atr_name, "") + elif atr_name in type_hints and auto_attribs: + fields_dict[atr_name] = field_type( + name=atr_name, + type=type_hints[atr_name], + default=atr, + help_string=helps.get(atr_name, ""), + ) + if auto_attribs: + for atr_name, type_ in type_hints.items(): + if atr_name not in list(fields_dict) + ["Task", "Outputs"]: + fields_dict[atr_name] = field_type( + name=atr_name, type=type_, help_string=helps.get(atr_name, "") + ) return list(fields_dict.values()) inputs = get_fields(klass, arg_type, auto_attribs, input_helps) - outputs_klass = get_outputs_class(klass) + try: + outputs_klass = klass.Outputs + except AttributeError: + raise AttributeError( + f"Nested Outputs class not found in {klass.__name__}" + ) from None output_helps, _ = parse_doc_string(outputs_klass.__doc__) - if outputs_klass is None: - raise ValueError(f"Nested Outputs class not found in {klass.__name__}") outputs = get_fields(outputs_klass, out_type, auto_attribs, output_helps) return inputs, outputs -def get_outputs_class(klass: type | None = None) -> type | None: - """Get the Outputs class from the nested "Outputs" class or from the Interface class - args""" - if klass is None: - return None - try: - outputs_klass = klass.Outputs - except AttributeError: - try: - interface_class = next( - b for b in klass.__mro__ if ty.get_origin(b) is Interface - ) - except StopIteration: - outputs_klass = None - else: - outputs_klass = ty.get_args(interface_class)[0] - return outputs_klass - - def make_interface( task_type: type[Task], inputs: list[Arg], @@ -437,6 +431,7 @@ def allowed_values_validator(_, attribute, value): def extract_inputs_and_outputs_from_function( function: ty.Callable, + arg_type: type[Arg], inputs: list[str | Arg] | dict[str, Arg | type] | None = None, outputs: list[str | Out] | dict[str, Out | type] | type | None = None, ) -> tuple[dict[str, type | Arg], dict[str, type | Out]]: @@ -445,8 +440,11 @@ def extract_inputs_and_outputs_from_function( sig = inspect.signature(function) type_hints = ty.get_type_hints(function) input_types = {} + input_defaults = {} for p in sig.parameters.values(): input_types[p.name] = type_hints.get(p.name, ty.Any) + if p.default is not inspect.Parameter.empty: + input_defaults[p.name] = p.default if inputs: if not isinstance(inputs, dict): raise ValueError( @@ -463,6 +461,12 @@ def extract_inputs_and_outputs_from_function( inpt.type = type_ else: inputs = input_types + for inpt_name, default in input_defaults.items(): + inpt = inputs[inpt_name] + if isinstance(inpt, arg_type) and inpt.default is EMPTY: + inpt.default = default + else: + inputs[inpt_name] = arg_type(type=inpt, default=default) return_type = type_hints.get("return", ty.Any) if outputs is None: src = inspect.getsource(function).strip() diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 76e3fed362..3c16ddec19 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -32,7 +32,7 @@ def test_interface_template_more_complex(): ( "cp -R -v " "--text-arg --int-arg " - # "--tuple-arg " + "--tuple-arg <:text> " ), ) @@ -47,7 +47,7 @@ def test_interface_template_more_complex(): shell.arg(name="verbose", type=bool, position=4), shell.arg(name="text_arg", type=field.Text, position=5), shell.arg(name="int_arg", type=field.Integer, position=6), - # shell.arg(name="tuple_arg", type=tuple[field.Integer,field.Text], position=6), + shell.arg(name="tuple_arg", type=tuple[field.Integer, field.Text], position=6), ] assert outputs == [ shell.outarg(name="out_path", type=FsObject, position=2), @@ -65,7 +65,7 @@ def test_interface_template_with_overrides(): ( "cp -R -v " "--text-arg --int-arg " - # "--tuple-arg " + "--tuple-arg " ), inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, outputs={"out_path": shell.outarg(position=-1)}, @@ -81,7 +81,7 @@ def test_interface_template_with_overrides(): shell.arg(name="verbose", type=bool, position=3), shell.arg(name="text_arg", type=field.Text, position=4), shell.arg(name="int_arg", type=field.Integer, position=5), - # shell.arg(name="tuple_arg", type=tuple[field.Integer,field.Text], position=6), + shell.arg(name="tuple_arg", type=tuple[field.Integer, field.Text], position=6), shell.outarg(name="out_path", type=FsObject, position=-1), ] assert outputs == [ @@ -95,7 +95,7 @@ def test_interface_template_with_type_overrides(): ( "cp -R -v " "--text-arg --int-arg " - # "--tuple-arg " + "--tuple-arg " ), inputs={"text_arg": str, "int_arg": int}, ) @@ -109,7 +109,7 @@ def test_interface_template_with_type_overrides(): shell.arg(name="verbose", type=bool, position=3), shell.arg(name="text_arg", type=str, position=4), shell.arg(name="int_arg", type=int, position=5), - # shell.arg(name="tuple_arg", type=tuple[field.Integer,field.Text], position=6), + shell.arg(name="tuple_arg", type=tuple[field.Integer, field.Text], position=6), shell.outarg(name="out_path", type=FsObject, position=-1), ] assert outputs == [ From bba8be7139cd404e0b680f6c66c3eac4bafc4142 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 16 Nov 2024 11:59:02 +1100 Subject: [PATCH 027/342] added function to python.interface fields instead of static --- pydra/design/python.py | 12 +- pydra/design/shell.py | 13 +- pydra/design/tests/test_python.py | 270 ++++++++++++++++++------------ 3 files changed, 182 insertions(+), 113 deletions(-) diff --git a/pydra/design/python.py b/pydra/design/python.py index 2aa43949df..a1f7a6d89f 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -70,7 +70,7 @@ def make(wrapped: ty.Callable | type) -> Interface: function = wrapped input_helps, output_helps = parse_doc_string(function.__doc__) inferred_inputs, inferred_outputs = ( - extract_inputs_and_outputs_from_function(function, inputs, outputs) + extract_inputs_and_outputs_from_function(function, arg, inputs, outputs) ) name = function.__name__ @@ -82,6 +82,13 @@ def make(wrapped: ty.Callable | type) -> Interface: input_helps=input_helps, output_helps=output_helps, ) + + try: + parsed_inputs.remove(next(i for i in parsed_inputs if i.name == "function")) + except StopIteration: + pass + parsed_inputs.append(arg(name="function", type=ty.Callable, default=function)) + interface = make_interface( FunctionTask, parsed_inputs, @@ -91,8 +98,7 @@ def make(wrapped: ty.Callable | type) -> Interface: bases=bases, outputs_bases=outputs_bases, ) - # Set the function in the created class - interface.function = function + return interface if wrapped is not None: diff --git a/pydra/design/shell.py b/pydra/design/shell.py index fb6813c7b5..8f25e9666e 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -365,8 +365,11 @@ def parse_command_line_template( return template, inputs, outputs executable, args_str = parts tokens = re.split(r"\s+", args_str.strip()) - arg_re = re.compile(r"<([:a-zA-Z0-9\|\-\.\/\+]+)>") - opt_re = re.compile(r"--?(\w+)") + arg_pattern = r"<([:a-zA-Z0-9_\|\-\.\/\+]+)>" + opt_pattern = r"--?[a-zA-Z0-9_]+" + arg_re = re.compile(arg_pattern) + opt_re = re.compile(opt_pattern) + bool_arg_re = re.compile(f"({opt_pattern})({arg_pattern})") arguments = [] options = [] @@ -421,7 +424,6 @@ def add_option(opt): raise ValueError(f"Unknown type {type_str}") else: type_ = generic.FsObject if field_type is arg else field.Text - type_ = from_mime(type_str) if type_str is not None else ty.Any if option is None: arguments.append(merge_or_create_field(name, field_type, type_)) else: @@ -430,6 +432,11 @@ def add_option(opt): if option is not None: add_option(option) option = (match.group(1), field_type, []) + elif match := bool_arg_re.match(token): + if option is not None: + add_option(option) + option = None + add_option(match.group(1), arg, [(match.group(2), field.Boolean)]) if option is not None: add_option(option) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index a8c954e51d..f3a1e27dce 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -1,80 +1,122 @@ from operator import attrgetter -import pytest -from pydra import design +import typing as ty from decimal import Decimal -from pydra.design.python import arg, out, interface +import attrs +import pytest +from pydra.design import list_fields, Interface +from pydra.design import python from pydra.engine.task import FunctionTask +sort_key = attrgetter("name") + + def test_interface_wrap_function(): - def sample_interface(a: int) -> float: + def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = interface( - sample_interface, - inputs={"a": arg(help_string="The argument to be doubled")}, - outputs={"b": out(help_string="the doubled output", type=Decimal)}, - ) + SampleInterface = python.interface(func) + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int), + python.arg(name="function", type=ty.Callable, default=func), + ] + assert outputs == [python.out(name="out", type=float)] + + +def test_interface_wrap_function_with_default(): + def func(a: int, k: float = 2.0) -> float: + """Sample function with inputs and outputs""" + return a * k + + SampleInterface = python.interface(func) + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=int), + python.arg(name="function", type=ty.Callable, default=func), + python.arg(name="k", type=float, default=2.0), + ] + assert outputs == [python.out(name="out", type=float)] + - assert issubclass(SampleInterface, design.Interface) - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") +def test_interface_wrap_function_overrides(): + def func(a: int) -> float: + """Sample function with inputs and outputs""" + return a * 2 + + SampleInterface = python.interface( + func, + inputs={"a": python.arg(help_string="The argument to be doubled")}, + outputs={"b": python.out(help_string="the doubled output", type=Decimal)}, ) + + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int, help_string="The argument to be doubled"), + python.arg(name="a", type=int, help_string="The argument to be doubled"), + python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [ - out(name="b", type=Decimal, help_string="the doubled output"), + python.out(name="b", type=Decimal, help_string="the doubled output"), ] def test_interface_wrap_function_types(): - def sample_interface(a: int) -> int: + def func(a: int) -> int: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = interface( - sample_interface, + SampleInterface = python.interface( + func, inputs={"a": float}, outputs={"b": float}, ) - assert issubclass(SampleInterface, design.Interface) - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) - assert inputs == [arg(name="a", type=float)] - assert outputs == [out(name="b", type=float)] + assert issubclass(SampleInterface, Interface) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert inputs == [ + python.arg(name="a", type=float), + python.arg(name="function", type=ty.Callable, default=func), + ] + assert outputs == [python.out(name="b", type=float)] def test_decorated_function_interface(): - @design.python.interface(outputs=["c", "d"]) + @python.interface(outputs=["c", "d"]) def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" return a + b, a * b - assert issubclass(SampleInterface, design.Interface) + assert issubclass(SampleInterface, Interface) assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int), - arg(name="b", type=float), + python.arg(name="a", type=int), + python.arg(name="b", type=float), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float), - out(name="d", type=float), + python.out(name="c", type=float), + python.out(name="d", type=float), ] - assert SampleInterface.function.__name__ == "SampleInterface" def test_interface_with_function_implicit_outputs_from_return_stmt(): - @design.python.interface + @python.interface def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" c = a + b @@ -82,23 +124,25 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return c, d assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int), - arg(name="b", type=float), + python.arg(name="a", type=int), + python.arg(name="b", type=float), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float), - out(name="d", type=float), + python.out(name="c", type=float), + python.out(name="d", type=float), ] - assert SampleInterface.function.__name__ == "SampleInterface" def test_interface_with_function_docstr(): - @design.python.interface(outputs=["c", "d"]) + @python.interface(outputs=["c", "d"]) def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing @@ -110,23 +154,25 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int, help_string="First input to be inputted"), - arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help_string="First input to be inputted"), + python.arg(name="b", type=float, help_string="Second input"), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float, help_string="Sum of a and b"), - out(name="d", type=float, help_string="product of a and b"), + python.out(name="c", type=float, help_string="Sum of a and b"), + python.out(name="d", type=float, help_string="product of a and b"), ] - assert SampleInterface.function.__name__ == "SampleInterface" def test_interface_with_function_google_docstr(): - @design.python.interface(outputs=["c", "d"]) + @python.interface(outputs=["c", "d"]) def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing @@ -142,23 +188,25 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int, help_string="First input to be inputted"), - arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help_string="First input to be inputted"), + python.arg(name="b", type=float, help_string="Second input"), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float, help_string="Sum of a and b"), - out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help_string="Sum of a and b"), + python.out(name="d", type=float, help_string="Product of a and b"), ] - assert SampleInterface.function.__name__ == "SampleInterface" def test_interface_with_function_numpy_docstr(): - @design.python.interface( + @python.interface( outputs=["c", "d"] ) # Could potentiall read output names from doc-string instead def SampleInterface(a: int, b: float) -> tuple[float, float]: @@ -182,23 +230,25 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int, help_string="First input to be inputted"), - arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help_string="First input to be inputted"), + python.arg(name="b", type=float, help_string="Second input"), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float, help_string="Sum of a and b"), - out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help_string="Sum of a and b"), + python.out(name="d", type=float, help_string="Product of a and b"), ] - assert SampleInterface.function.__name__ == "SampleInterface" def test_interface_with_class(): - @design.python.interface + @python.interface class SampleInterface: """Sample class for testing @@ -209,7 +259,7 @@ class SampleInterface: """ a: int - b: float + b: float = 2.0 class Outputs: """ @@ -225,26 +275,29 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, design.Interface) + assert issubclass(SampleInterface, Interface) assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int, help_string="First input to be inputted"), - arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help_string="First input to be inputted"), + python.arg(name="b", type=float, default=2.0, help_string="Second input"), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float, help_string="Sum of a and b"), - out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help_string="Sum of a and b"), + python.out(name="d", type=float, help_string="Product of a and b"), ] assert SampleInterface.function.__name__ == "function" def test_interface_with_inheritance(): - @design.python.interface - class SampleInterface(design.Interface["SampleInterface.Outputs"]): + @python.interface + class SampleInterface(Interface["SampleInterface.Outputs"]): """Sample class for testing Args: @@ -270,20 +323,20 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, design.Interface) + assert issubclass(SampleInterface, Interface) def test_interface_with_class_no_auto_attribs(): - @design.python.interface(auto_attribs=False) + @python.interface(auto_attribs=False) class SampleInterface: - a: int = arg(help_string="First input to be inputted") - b: float = arg(help_string="Second input") + a: int = python.arg(help_string="First input to be inputted") + b: float = python.arg(help_string="Second input") x: int class Outputs: - c: float = out(help_string="Sum of a and b") - d: float = out(help_string="Product of a and b") + c: float = python.out(help_string="Sum of a and b") + d: float = python.out(help_string="Product of a and b") y: str @@ -292,17 +345,20 @@ def function(a, b): return a + b, a * b assert SampleInterface.Task is FunctionTask - inputs = sorted(design.list_fields(SampleInterface), key=attrgetter("name")) - outputs = sorted( - design.list_fields(SampleInterface.Outputs), key=attrgetter("name") - ) + inputs = sorted(list_fields(SampleInterface), key=sort_key) + outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ - arg(name="a", type=int, help_string="First input to be inputted"), - arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help_string="First input to be inputted"), + python.arg(name="b", type=float, help_string="Second input"), + python.arg( + name="function", + type=ty.Callable, + default=attrs.fields(SampleInterface).function.default, + ), ] assert outputs == [ - out(name="c", type=float, help_string="Sum of a and b"), - out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help_string="Sum of a and b"), + python.out(name="d", type=float, help_string="Product of a and b"), ] assert SampleInterface.function.__name__ == "function" @@ -310,8 +366,8 @@ def function(a, b): def test_interface_invalid_wrapped1(): with pytest.raises(ValueError): - @design.python.interface(inputs={"a": arg()}) - class SampleInterface(design.Interface["SampleInterface.Outputs"]): + @python.interface(inputs={"a": python.arg()}) + class SampleInterface(Interface["SampleInterface.Outputs"]): a: int class Outputs: @@ -325,8 +381,8 @@ def function(a): def test_interface_invalid_wrapped2(): with pytest.raises(ValueError): - @design.python.interface(outputs={"b": out()}) - class SampleInterface(design.Interface["SampleInterface.Outputs"]): + @python.interface(outputs={"b": python.out()}) + class SampleInterface(Interface["SampleInterface.Outputs"]): a: int class Outputs: From 7ab7713e47f0a5939178df8d6e761ad825832f3c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 16 Nov 2024 16:50:36 +1100 Subject: [PATCH 028/342] reworked and debugged shell design tests --- pydra/design/base.py | 78 ++++--- pydra/design/shell.py | 361 +++++++----------------------- pydra/design/tests/test_python.py | 5 + pydra/design/tests/test_shell.py | 262 ++++++++++++++-------- pydra/utils/typing.py | 20 +- 5 files changed, 318 insertions(+), 408 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index bd08997b4f..1ca7aa813c 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -3,11 +3,12 @@ import inspect import re import enum +from pathlib import Path from copy import copy import attrs.validators from attrs.converters import default_if_none from fileformats.generic import File -from pydra.utils.typing import TypeParser +from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union from pydra.engine.helpers import from_list_if_single, ensure_list from pydra.engine.specs import ( LazyField, @@ -61,10 +62,8 @@ class Field: The type of the field, by default it is Any help_string: str, optional A short description of the input field. - mandatory: bool, optional - If True user has to provide a value for the field, by default it is False requires: list, optional - List of field names that are required together with the field. + Names of the inputs that are required together with the field. converter: callable, optional The converter for the field passed through to the attrs.field, by default it is None validator: callable | iterable[callable], optional @@ -76,7 +75,6 @@ class Field: validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) ) help_string: str = "" - mandatory: bool = False requires: list | None = None converter: ty.Callable | None = None validator: ty.Callable | None = None @@ -92,14 +90,12 @@ class Arg(Field): A short description of the input field. default : Any, optional the default value for the argument - mandatory: bool, optional - If True user has to provide a value for the field, by default it is False allowed_values: list, optional List of allowed values for the field. requires: list, optional - List of field names that are required together with the field. + Names of the inputs that are required together with the field. xor: list, optional - List of field names that are mutually exclusive with the field. + Names of the inputs that are mutually exclusive with the field. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -240,7 +236,7 @@ def collate_fields( outputs = [ ( o - if isinstance(o, out_type) + if isinstance(o, Out) else out_type(name=n, type=o, help_string=output_helps.get(n, "")) ) for n, o in outputs.items() @@ -267,10 +263,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: fields_dict[field.name] = field type_hints = ty.get_type_hints(klass) for atr_name in dir(klass): - if atr_name in list(fields_dict) + [ - "Task", - "Outputs", - ] or atr_name.startswith("__"): + if atr_name in ["Task", "Outputs"] or atr_name.startswith("__"): continue try: atr = getattr(klass, atr_name) @@ -283,13 +276,16 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: atr.type = type_hints[atr_name] if not atr.help_string: atr.help_string = helps.get(atr_name, "") - elif atr_name in type_hints and auto_attribs: - fields_dict[atr_name] = field_type( - name=atr_name, - type=type_hints[atr_name], - default=atr, - help_string=helps.get(atr_name, ""), - ) + elif atr_name in type_hints: + if atr_name in fields_dict: + fields_dict[atr_name].type = type_hints[atr_name] + elif auto_attribs: + fields_dict[atr_name] = field_type( + name=atr_name, + type=type_hints[atr_name], + default=atr, + help_string=helps.get(atr_name, ""), + ) if auto_attribs: for atr_name, type_ in type_hints.items(): if atr_name not in list(fields_dict) + ["Task", "Outputs"]: @@ -312,6 +308,16 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: return inputs, outputs +def _get_default(field: Field) -> ty.Any: + if not hasattr(field, "default"): + return attrs.NOTHING + if field.default is not EMPTY: + return field.default + if is_optional(field.type): + return None + return attrs.NOTHING + + def make_interface( task_type: type[Task], inputs: list[Arg], @@ -332,6 +338,7 @@ def make_interface( o.name: attrs.field( converter=get_converter(o, f"{name}.Outputs"), metadata={PYDRA_ATTR_METADATA: o}, + default=_get_default(o), ) for o in outputs }, @@ -363,18 +370,33 @@ def make_interface( klass.Outputs = outputs_klass # Now that we have saved the attributes in lists to be for arg in inputs: + # If an outarg input then the field type should be Path not a FileSet + if isinstance(arg, Out) and is_fileset_or_union(arg.type): + if getattr(arg, "path_template", False): + if is_optional(arg.type): + field_type = Path | bool | None + # Will default to None and not be inserted into the command + else: + field_type = Path | bool + arg.default = True + elif is_optional(arg.type): + field_type = Path | None + else: + field_type = Path + else: + field_type = arg.type setattr( klass, arg.name, attrs.field( - default=arg.default if arg.default is not EMPTY else attrs.NOTHING, - converter=get_converter(arg, klass.__name__), + default=_get_default(arg), + converter=get_converter(arg, klass.__name__, field_type), validator=get_validator(arg, klass.__name__), metadata={PYDRA_ATTR_METADATA: arg}, on_setattr=attrs.setters.convert, ), ) - klass.__annotations__[arg.name] = arg.type + klass.__annotations__[arg.name] = field_type # Create class using attrs package, will create attributes for all columns and # parameters @@ -383,10 +405,12 @@ def make_interface( return attrs_klass -def get_converter(field: Field, interface_name: str): +def get_converter(field: Field, interface_name: str, field_type: ty.Type | None = None): + if field_type is None: + field_type = field.type checker_label = f"'{field.name}' field of {interface_name} interface" - type_checker = TypeParser[field.type]( - field.type, label=checker_label, superclass_auto_cast=True + type_checker = TypeParser[field_type]( + field_type, label=checker_label, superclass_auto_cast=True ) converters = [] if field.type in (MultiInputObj, MultiInputFile): diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 8f25e9666e..76430e7a50 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -18,6 +18,7 @@ collate_fields, Interface, make_interface, + EMPTY, ) from pydra.engine.task import ShellCommandTask @@ -81,11 +82,22 @@ class arg(Arg): argstr: str | None = "" position: int | None = None - sep: str | None = None + sep: str | None = attrs.field(default=None) allowed_values: list | None = None - container_path: bool = False + container_path: bool = False # IS THIS STILL USED?? formatter: ty.Callable | None = None + @sep.validator + def _validate_sep(self, attribute, value): + if ( + value is not None + and self.type is not ty.Any + and not issubclass(self.type, ty.Iterable) + ): + raise ValueError( + f"sep ({value!r}) can only be provided when type is iterable" + ) + @attrs.define(kw_only=True) class out(Out): @@ -158,18 +170,22 @@ class outarg(Out, arg): function can take field (this input field will be passed to the function), inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). - file_template: str, optional + path_template: str, optional If provided, the field is treated also as an output field and it is added to the output spec. The template can use other fields, e.g. {file1}. Used in order to create an output specification. - template_field: str, optional - If provided the field is added to the output spec with changed name. Used in - order to create an output specification. Used together with output_file_template """ - file_template: str | None = None - template_field: str | None = None + path_template: str | None = attrs.field(default=None) + + @path_template.validator + def _validate_path_template(self, attribute, value): + if value and self.default not in (EMPTY, True, None): + raise ValueError( + f"path_template ({value!r}) can only be provided when no default " + f"({self.default!r}) is provided" + ) def interface( @@ -180,7 +196,6 @@ def interface( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, - args_last: bool = False, name: str | None = None, ) -> Interface: """Create a shell command interface @@ -234,7 +249,6 @@ def make( executable, inferred_inputs, inferred_outputs = parse_command_line_template( wrapped, - args_last=args_last, inputs=inputs, outputs=outputs, ) @@ -247,7 +261,9 @@ def make( input_helps=input_helps, output_helps=output_helps, ) - class_name = executable if not name else name + class_name = re.sub(r"[^\w]", "_", executable) if not name else name + if class_name[0].isdigit(): + class_name = f"_{class_name}" # Update the inputs (overriding inputs from base classes) with the executable # and the output argument fields @@ -263,6 +279,7 @@ def make( for inpt in parsed_inputs: if inpt.position is None: inpt.position = position_stack.pop() + parsed_inputs.sort(key=lambda x: x.position) interface = make_interface( ShellCommandTask, @@ -307,7 +324,6 @@ def parse_command_line_template( template: str, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, outputs: list[str | Out] | dict[str, Out | type] | type | None = None, - args_last: bool = False, ) -> ty.Tuple[str, dict[str, Arg | type], dict[str, Out | type]]: """Parses a command line template into a name and input and output fields. Fields are inferred from the template if not provided, where inputs are specified with `` @@ -335,17 +351,14 @@ def parse_command_line_template( The input fields of the shell command outputs : list[str | Out] | dict[str, Out | type] | type | None The output fields of the shell command - args_last : bool - Whether to put the executable argument last in the command line instead of first - as they appear in the template Returns ------- executable : str The name of the command line template - inputs : dict + inputs : dict[str, Arg | type] The input fields of the command line template - outputs : dict + outputs : dict[str, Out | type] The output fields of the command line template """ if isinstance(inputs, list): @@ -365,17 +378,16 @@ def parse_command_line_template( return template, inputs, outputs executable, args_str = parts tokens = re.split(r"\s+", args_str.strip()) - arg_pattern = r"<([:a-zA-Z0-9_\|\-\.\/\+]+)>" + arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+)>" opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) opt_re = re.compile(opt_pattern) bool_arg_re = re.compile(f"({opt_pattern})({arg_pattern})") arguments = [] - options = [] option = None - def merge_or_create_field(name, field_type, type): + def merge_or_create_field(name, field_type, kwds): """Merge the typing information with an existing field if it exists""" if isinstance(field_type, out): dct = outputs @@ -384,50 +396,77 @@ def merge_or_create_field(name, field_type, type): try: field = dct.pop(name) except KeyError: - field = field_type(name=name, type=type_) + field = field_type(name=name, **kwds) else: if isinstance(field, dict): field = field_type(**field) elif not isinstance(field, field_type): # If field type is outarg not out field = field_type(**attrs.asdict(field)) field.name = name + type_ = kwds.pop("type", field.type) if field.type is ty.Any: field.type = type_ + for k, v in kwds.items(): + setattr(field, k, v) + return field def add_option(opt): - name, field_type, type_ = opt - if len(type_) > 1: - type_ = tuple[tuple(type_)] + name, field_type, kwds = opt + if kwds["type"] is not bool: + kwds["type"] |= None + arguments.append(merge_or_create_field(name, field_type, type_)) + + def from_type_str(type_str) -> type: + types = [] + for tp in type_str.split(","): + if "/" in tp: + type_ = from_mime(tp) + else: + try: + type_ = from_mime(f"field/{tp}") + except FormatRecognitionError: + try: + type_ = from_mime(f"generic/{tp}") + except FormatRecognitionError: + raise ValueError(f"Unknown type {tp}") + types.append(type_) + if len(types) > 1: + type_ = tuple[types] else: - type_ = type_[0] - options.append(merge_or_create_field(name, field_type, type_)) + type_ = types[0] + return type_ for token in tokens: if match := arg_re.match(token): - name = match.group() + name = match.group(1) if name.startswith("out|"): name = name[4:] field_type = outarg else: field_type = arg + # Identify type after ':' symbols if ":" in name: name, type_str = name.split(":") - if "/" in type_str: - type_ = from_mime(type_str) - else: - try: - type_ = from_mime(f"field/{type_str}") - except FormatRecognitionError: - try: - type_ = from_mime(f"generic/{type_str}") - except FormatRecognitionError: - raise ValueError(f"Unknown type {type_str}") + type_ = from_type_str(type_str) else: - type_ = generic.FsObject if field_type is arg else field.Text + type_ = generic.FsObject if option is None else field.Text + kwds = {"type": type_} + # If name contains a '.', treat it as a file template and strip it from the name + if "." in name: + if field_type is not outarg: + raise ValueError( + f"File template fields (i.e. with '.' in their names) can only " + f"be used with file types, not {type_} and {field_type}" + ) + kwds["path_template"] = name + name = name.split(".")[0] + elif field_type is outarg: + kwds["path_template"] = name + if option is None: - arguments.append(merge_or_create_field(name, field_type, type_)) + arguments.append(merge_or_create_field(name, field_type, kwds)) else: - option[1].append((name, type_)) + option[1].append((name, kwds)) elif match := opt_re.match(token): if option is not None: add_option(option) @@ -443,9 +482,7 @@ def add_option(opt): inferred_inputs = [] inferred_outputs = [] - all_args = options + arguments if args_last else arguments + options - - for i, argument in enumerate(all_args, start=1): + for i, argument in enumerate(arguments, start=1): argument.position = i if isinstance(argument, outarg): inferred_outputs.append(argument) @@ -492,241 +529,3 @@ def remaining_positions(args: list[Arg], num_args: int | None = None) -> ty.List f"Multiple fields have the overlapping positions: {multiple_positions}" ) return [i for i in range(num_args) if i not in positions] - - -# def interface( -# klass_or_name: ty.Union[type, str], -# executable: ty.Optional[str] = None, -# input_fields: ty.Optional[dict[str, dict]] = None, -# output_fields: ty.Optional[dict[str, dict]] = None, -# bases: ty.Optional[list[type]] = None, -# inputs_bases: ty.Optional[list[type]] = None, -# outputs_bases: ty.Optional[list[type]] = None, -# ) -> type: -# """ -# Construct an analysis class and validate all the components fit together - -# Parameters -# ---------- -# klass_or_name : type or str -# Either the class decorated by the @shell_task decorator or the name for a -# dynamically generated class -# executable : str, optional -# If dynamically constructing a class (instead of decorating an existing one) the -# name of the executable to run is provided -# input_fields : dict[str, dict], optional -# If dynamically constructing a class (instead of decorating an existing one) the -# input fields can be provided as a dictionary of dictionaries, where the keys -# are the name of the fields and the dictionary contents are passed as keyword -# args to cmd_arg, with the exception of "type", which is used as the type annotation -# of the field. -# output_fields : dict[str, dict], optional -# If dynamically constructing a class (instead of decorating an existing one) the -# output fields can be provided as a dictionary of dictionaries, where the keys -# are the name of the fields and the dictionary contents are passed as keyword -# args to cmd_out, with the exception of "type", which is used as the type annotation -# of the field. -# bases : list[type] -# Base classes for dynamically constructed shell command classes -# inputs_bases : list[type] -# Base classes for the input spec of dynamically constructed shell command classes -# outputs_bases : list[type] -# Base classes for the input spec of dynamically constructed shell command classes - -# Returns -# ------- -# type -# the shell command task class -# """ - -# annotations = { -# "executable": str, -# "Outputs": type, -# } -# dct = {"__annotations__": annotations} - -# if isinstance(klass_or_name, str): -# # Dynamically created classes using shell_task as a function -# name = klass_or_name - -# if executable is not None: -# dct["executable"] = executable -# if input_fields is None: -# input_fields = {} -# if output_fields is None: -# output_fields = {} -# bases = list(bases) if bases is not None else [] -# inputs_bases = list(inputs_bases) if inputs_bases is not None else [] -# outputs_bases = list(outputs_bases) if outputs_bases is not None else [] - -# # Ensure base classes included somewhere in MRO -# def ensure_base_included(base_class: type, bases_list: list[type]): -# if not any(issubclass(b, base_class) for b in bases_list): -# bases_list.append(base_class) - -# # Get inputs and outputs bases from base class if not explicitly provided -# for base in bases: -# if not inputs_bases: -# try: -# inputs_bases = [base.Inputs] -# except AttributeError: -# pass -# if not outputs_bases: -# try: -# outputs_bases = [base.Outputs] -# except AttributeError: -# pass - -# # Ensure bases are lists and can be modified -# ensure_base_included(pydra.engine.task.ShellCommandTask, bases) -# ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) -# ensure_base_included(pydra.engine.specs.ShellOutSpec, outputs_bases) - -# def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): -# annotations = {} -# attrs_dict = {"__annotations__": annotations} -# for name, dct in fields.items(): -# kwargs = dict(dct) # copy to avoid modifying input to outer function -# annotations[name] = kwargs.pop("type") -# attrs_dict[name] = attrs_func(**kwargs) -# return attrs_dict - -# Inputs = attrs.define(kw_only=True, slots=False)( -# type( -# "Inputs", -# tuple(inputs_bases), -# convert_to_attrs(input_fields, arg), -# ) -# ) - -# Outputs = attrs.define(kw_only=True, slots=False)( -# type( -# "Outputs", -# tuple(outputs_bases), -# convert_to_attrs(output_fields, out), -# ) -# ) - -# else: -# # Statically defined classes using shell_task as decorator -# if ( -# executable, -# input_fields, -# output_fields, -# bases, -# inputs_bases, -# outputs_bases, -# ) != (None, None, None, None, None, None): -# raise RuntimeError( -# "When used as a decorator on a class, `shell_task` should not be " -# "provided any other arguments" -# ) -# klass = klass_or_name -# name = klass.__name__ - -# bases = [klass] -# if not issubclass(klass, pydra.engine.task.ShellCommandTask): -# bases.append(pydra.engine.task.ShellCommandTask) - -# try: -# executable = klass.executable -# except AttributeError: -# raise RuntimeError( -# "Classes decorated by `shell_task` should contain an `executable` " -# "attribute specifying the shell tool to run" -# ) -# try: -# Inputs = klass.Inputs -# except AttributeError: -# raise RuntimeError( -# "Classes decorated by `shell_task` should contain an `Inputs` class " -# "attribute specifying the inputs to the shell tool" -# ) - -# try: -# Outputs = klass.Outputs -# except AttributeError: -# Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) - -# # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. -# # not in a base class) -# if "Inputs" in klass.__dict__: -# Inputs = attrs.define(kw_only=True, slots=False)(Inputs) -# if "Outputs" in klass.__dict__: -# Outputs = attrs.define(kw_only=True, slots=False)(Outputs) - -# if not issubclass(Inputs, pydra.engine.specs.ShellSpec): -# Inputs = attrs.define(kw_only=True, slots=False)( -# type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) -# ) - -# template_fields = _gen_output_template_fields(Inputs, Outputs) - -# if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): -# outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) -# add_base_class = True -# else: -# outputs_bases = (Outputs,) -# add_base_class = False - -# if add_base_class or template_fields: -# Outputs = attrs.define(kw_only=True, slots=False)( -# type("Outputs", outputs_bases, template_fields) -# ) - -# dct["Inputs"] = Inputs -# dct["Outputs"] = Outputs - -# task_klass = type(name, tuple(bases), dct) - -# if not hasattr(task_klass, "executable"): -# raise RuntimeError( -# "Classes generated by `shell_task` should contain an `executable` " -# "attribute specifying the shell tool to run" -# ) - -# task_klass.input_spec = pydra.engine.specs.SpecInfo( -# name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) -# ) -# task_klass.output_spec = pydra.engine.specs.SpecInfo( -# name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) -# ) - -# return task_klass - - -# def _gen_output_template_fields(Inputs: type, Outputs: type) -> dict: -# """Auto-generates output fields for inputs that specify an 'output_file_template' - -# Parameters -# ---------- -# Inputs : type -# Inputs specification class -# Outputs : type -# Outputs specification class - -# Returns -# ------- -# template_fields: dict[str, attrs._make_CountingAttribute] -# the template fields to add to the output spec -# """ -# annotations = {} -# template_fields = {"__annotations__": annotations} -# output_field_names = [f.name for f in attrs.fields(Outputs)] -# for fld in attrs.fields(Inputs): -# if "output_file_template" in fld.metadata: -# if "output_field_name" in fld.metadata: -# field_name = fld.metadata["output_field_name"] -# else: -# field_name = fld.name -# # skip adding if the field already in the output_spec -# exists_already = field_name in output_field_names -# if not exists_already: -# metadata = { -# "help_string": fld.metadata["help_string"], -# "mandatory": fld.metadata["mandatory"], -# "keep_extension": fld.metadata["keep_extension"], -# } -# template_fields[field_name] = attrs.field(metadata=metadata) -# annotations[field_name] = str -# return template_fields diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index f3a1e27dce..2b08231932 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -113,6 +113,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="c", type=float), python.out(name="d", type=float), ] + assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" def test_interface_with_function_implicit_outputs_from_return_stmt(): @@ -139,6 +140,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="c", type=float), python.out(name="d", type=float), ] + assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" def test_interface_with_function_docstr(): @@ -169,6 +171,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="product of a and b"), ] + assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" def test_interface_with_function_google_docstr(): @@ -203,6 +206,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] + assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" def test_interface_with_function_numpy_docstr(): @@ -245,6 +249,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] + assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" def test_interface_with_class(): diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 3c16ddec19..fbc2888887 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -1,12 +1,11 @@ import os -from operator import attrgetter from pathlib import Path import attrs import pytest import cloudpickle as cp from pydra.design import shell, Interface, list_fields from fileformats.generic import File, Directory, FsObject, SetOf -from fileformats import field +from fileformats import field, text, image def test_interface_template(): @@ -14,44 +13,90 @@ def test_interface_template(): SampleInterface = shell.interface("cp ") assert issubclass(SampleInterface, Interface) - inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) - assert inputs == [ + output = shell.outarg( + name="out_path", + path_template="out_path", + default=True, + type=FsObject, + position=2, + ) + assert list_fields(SampleInterface) == [ shell.arg(name="executable", default="cp", type=str, position=0), shell.arg(name="in_path", type=FsObject, position=1), - shell.outarg(name="out_path", type=FsObject, position=2), + output, ] - assert outputs == [ - shell.outarg(name="out_path", type=FsObject, position=2), + assert list_fields(SampleInterface.Outputs) == [output] + intf = SampleInterface(in_path=File.mock("in-path.txt")) + assert intf.executable == "cp" + SampleInterface(in_path=File.mock("in-path.txt"), out_path=Path("./out-path.txt")) + SampleInterface.Outputs(out_path=File.mock("in-path.txt")) + + +def test_interface_template_w_types_and_path_template_ext(): + + SampleInterface = shell.interface( + "trim-png " + ) + + assert issubclass(SampleInterface, Interface) + output = shell.outarg( + name="out_image", + path_template="out_image.png", + default=True, + type=image.Png, + position=2, + ) + assert list_fields(SampleInterface) == [ + shell.arg(name="executable", default="trim-png", type=str, position=0), + shell.arg(name="in_image", type=image.Png, position=1), + output, ] + assert list_fields(SampleInterface.Outputs) == [output] + SampleInterface(in_image=image.Png.mock()) + SampleInterface(in_image=image.Png.mock(), out_image=Path("./new_image.png")) + SampleInterface.Outputs(out_image=image.Png.mock()) def test_interface_template_more_complex(): SampleInterface = shell.interface( ( - "cp -R -v " + "cp " + "-R " "--text-arg --int-arg " - "--tuple-arg <:text> " + "--tuple-arg " ), ) assert issubclass(SampleInterface, Interface) - inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) - assert inputs == [ + output = shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=2, + default=True, + ) + assert sorted(list_fields(SampleInterface), key=pos_key) == [ shell.arg(name="executable", default="cp", type=str, position=0), - shell.arg(name="in_paths", type=SetOf[FsObject], position=1, sep=" "), - shell.outarg(name="out_path", type=FsObject, position=2), - shell.arg(name="recursive", type=bool, position=3), - shell.arg(name="verbose", type=bool, position=4), - shell.arg(name="text_arg", type=field.Text, position=5), - shell.arg(name="int_arg", type=field.Integer, position=6), - shell.arg(name="tuple_arg", type=tuple[field.Integer, field.Text], position=6), - ] - assert outputs == [ - shell.outarg(name="out_path", type=FsObject, position=2), + shell.arg(name="in_fs_objects", type=SetOf[FsObject], position=1, sep=" "), + output, + shell.arg(name="recursive", arg_str="-R", type=bool, position=3), + shell.arg( + name="text_arg", arg_str="--text-arg", type=field.Text | None, position=5 + ), + shell.arg( + name="int_arg", arg_str="--int-arg", type=field.Integer | None, position=6 + ), + shell.arg( + name="tuple_arg", + arg_str="--tuple-arg", + type=tuple[field.Integer, field.Text] | None, + position=6, + ), ] + assert list_fields(SampleInterface.Outputs) == [output] + SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) + SampleInterface.Outputs(out_path=File.sample()) def test_interface_template_with_overrides(): @@ -63,58 +108,84 @@ def test_interface_template_with_overrides(): SampleInterface = shell.interface( ( - "cp -R -v " + "cp " + "-R " "--text-arg --int-arg " "--tuple-arg " ), inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, - outputs={"out_path": shell.outarg(position=-1)}, + outputs={"out_dir": shell.outarg(position=-1)}, ) assert issubclass(SampleInterface, Interface) - inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) - assert inputs == [ + output = shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=-1, + default=True, + ) + assert list_fields(SampleInterface) == [ shell.arg(name="executable", default="cp", type=str, position=0), - shell.arg(name="in_paths", type=SetOf[FsObject], position=1, sep=" "), - shell.arg(name="recursive", type=bool, help_string=RECURSIVE_HELP, position=2), - shell.arg(name="verbose", type=bool, position=3), - shell.arg(name="text_arg", type=field.Text, position=4), - shell.arg(name="int_arg", type=field.Integer, position=5), - shell.arg(name="tuple_arg", type=tuple[field.Integer, field.Text], position=6), - shell.outarg(name="out_path", type=FsObject, position=-1), - ] - assert outputs == [ - shell.outarg(name="out_path", type=FsObject, position=-1), + shell.arg(name="in_fs_objects", type=SetOf[FsObject], position=1, sep=" "), + shell.arg( + name="recursive", + arg_str="-R", + type=bool, + help_string=RECURSIVE_HELP, + position=2, + ), + shell.arg( + name="text_arg", argstr="--text-arg", type=field.Text | None, position=3 + ), + shell.arg( + name="int_arg", argstr="--int-arg", type=field.Integer | None, position=4 + ), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[field.Integer, field.Text] | None, + position=5, + ), + output, ] + assert list_fields(SampleInterface.Outputs) == [output] def test_interface_template_with_type_overrides(): SampleInterface = shell.interface( ( - "cp -R -v " + "cp " + "-R " "--text-arg --int-arg " "--tuple-arg " ), - inputs={"text_arg": str, "int_arg": int}, + inputs={"text_arg": str | None, "int_arg": int | None}, ) assert issubclass(SampleInterface, Interface) - inputs = sorted(list_fields(SampleInterface), key=inp_sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=out_sort_key) - assert inputs == [ - shell.arg(name="in_paths", type=SetOf[FsObject], position=1, sep=" "), - shell.arg(name="recursive", type=bool, position=2), - shell.arg(name="verbose", type=bool, position=3), - shell.arg(name="text_arg", type=str, position=4), - shell.arg(name="int_arg", type=int, position=5), - shell.arg(name="tuple_arg", type=tuple[field.Integer, field.Text], position=6), - shell.outarg(name="out_path", type=FsObject, position=-1), - ] - assert outputs == [ - shell.outarg(name="out_path", type=FsObject, position=-1), + output = shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=-1, + default=True, + ) + assert list_fields(SampleInterface) == [ + shell.arg(name="in_fs_objects", type=SetOf[FsObject], position=1, sep=" "), + shell.arg(name="recursive", argstr="-R", type=bool, position=2), + shell.arg(name="text_arg", argstr="--text-arg", type=str | None, position=4), + shell.arg(name="int_arg", argstr="--text-arg", type=int | None, position=5), + shell.arg( + name="tuple_arg", + targstr="--text-arg", + ype=tuple[field.Integer, field.Text] | None, + position=6, + ), + output, ] + assert list_fields(SampleInterface.Outputs) == [output] @pytest.fixture(params=["static", "dynamic"]) @@ -128,7 +199,6 @@ class Ls(Interface["Ls.Outputs"]): directory: Directory = shell.arg( help_string="the directory to list the contents of", argstr="", - mandatory=True, position=-1, ) hidden: bool = shell.arg( @@ -157,10 +227,10 @@ class Ls(Interface["Ls.Outputs"]): requires=["long_format"], xor=["date_format_str"], ) - date_format_str: str = shell.arg( + date_format_str: str | None = shell.arg( help_string="format string for ", argstr="-D", - default=attrs.NOTHING, + default=None, requires=["long_format"], xor=["complete_date"], ) @@ -179,7 +249,6 @@ class Outputs: type=Directory, help_string="the directory to list the contents of", argstr="", - mandatory=True, position=-1, ), "hidden": shell.arg( @@ -209,7 +278,7 @@ class Outputs: xor=["date_format_str"], ), "date_format_str": shell.arg( - type=str, + type=str | None, help_string="format string for ", argstr="-D", requires=["long_format"], @@ -306,7 +375,7 @@ class Outputs: y : File path of output file""" - y: File = shell.outarg(file_template="{x}_out", position=-1) + y: File = shell.outarg(path_template="{x}_out", position=-1) elif request.param == "dynamic": A = shell.interface( @@ -324,7 +393,7 @@ class Outputs: type=File, help_string="path of output file", argstr="", - file_template="{x}_out", + path_template="{x}_out", ), }, name="A", @@ -335,7 +404,7 @@ class Outputs: return A -def test_shell_output_file_template(A): +def test_shell_output_path_template(A): assert "y" in [a.name for a in attrs.fields(A.Outputs)] @@ -351,16 +420,23 @@ class A: class Outputs: y: File = shell.outarg( help_string="the output file", - file_template="{x}_out", + path_template="{x}_out", argstr="", position=-1, ) assert sorted([a.name for a in attrs.fields(A)]) == ["executable", "x", "y"] assert [a.name for a in attrs.fields(A.Outputs)] == ["y"] - inputs = sorted(list_fields(A), key=attrgetter("name")) - outputs = sorted(list_fields(A.Outputs), key=attrgetter("name")) - assert inputs == [ + output = shell.outarg( + name="y", + type=File, + help_string="the output file", + path_template="{x}_out", + default=True, + argstr="", + position=-1, + ) + assert sorted(list_fields(A), key=pos_key) == [ shell.arg( name="executable", default="cp", @@ -375,25 +451,9 @@ class Outputs: argstr="", position=1, ), - shell.outarg( - name="y", - type=File, - help_string="the output file", - file_template="{x}_out", - argstr="", - position=-1, - ), - ] - assert outputs == [ - shell.outarg( - name="y", - type=File, - help_string="the output file", - file_template="{x}_out", - argstr="", - position=-1, - ) + output, ] + assert list_fields(A.Outputs) == [output] def test_shell_output_field_name_dynamic(): @@ -413,8 +473,8 @@ def test_shell_output_field_name_dynamic(): type=File, help_string="path of output file", argstr="", - template_field="y_out", - file_template="{x}_out", + path_template="{x}_out", + default=True, ), }, ) @@ -461,22 +521,30 @@ def test_shell_bases_static(A, tmp_path): @shell.interface class B(A): - y: File + y: text.Plain = shell.arg() # Override the output arg in A class Outputs: - out_file_size: int = shell.out( - help_string="size of the output directory", callable=get_file_size - ) + """ + Args: + out_file_size: size of the output directory + """ + + out_file_size: int = shell.out(callable=get_file_size) xpath = tmp_path / "x.txt" ypath = tmp_path / "y.txt" Path.touch(xpath) - Path.touch(ypath) + ypath.write_text("Hello, World!") - b = B(x=xpath, y=str(ypath)) + a = A(x=xpath, y=ypath) + assert a.x == File(xpath) + assert a.y == ypath + b = B(x=xpath, y=str(ypath)) assert b.x == File(xpath) - assert b.y == File(ypath) + # We have overridden the type of y from an output arg with a path_template so it + # gets coerced to a text.Plain object + assert b.y == text.Plain(ypath) # result = b() # assert result.output.y == str(ypath) @@ -516,16 +584,15 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): bases=[A], ) - hidden = File.sample(tmp_path, stem=".hidden") - b = B(directory=tmp_path, hidden=True) assert b.directory == Directory(tmp_path) assert b.hidden + # File.sample(tmp_path, stem=".hidden-file") # result = b() # assert result.runner.cmdline == f"ls -a {tmp_path}" - # assert result.output.entries == [".", "..", ".hidden"] + # assert result.output.entries == [".", "..", ".hidden-file"] def test_shell_inputs_outputs_bases_static(tmp_path): @@ -607,10 +674,7 @@ def list_entries(stdout): return stdout.split("\n")[:-1] -inp_sort_key = attrgetter("position") - - -def out_sort_key(out: shell.out) -> int: +def pos_key(out: shell.out) -> int: LARGE_NUMBER = 1000000 try: pos = out.position diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 01b1d2f5f8..4276cf411a 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -15,7 +15,7 @@ MultiOutputObj, ) from pydra.utils import add_exc_note -from fileformats import field +from fileformats import field, core try: from typing import get_origin, get_args @@ -988,3 +988,21 @@ def label_str(self): get_origin = staticmethod(get_origin) get_args = staticmethod(get_args) + + +def is_union(type_: type) -> bool: + return ty.get_origin(type_) in UNION_TYPES + + +def is_optional(type_: type) -> bool: + """Check if the type is Optional""" + if is_union(type_): + return any(a is type(None) or is_optional(a) for a in ty.get_args(type_)) + return False + + +def is_fileset_or_union(type_: type) -> bool: + """Check if the type is a FileSet or a Union containing a FileSet""" + if is_union(type_): + return any(is_fileset_or_union(t) for t in ty.get_args(type_)) + return issubclass(type_, core.FileSet) From e6064591c3e5f3d3524e54cbea2ce0aaadc6700d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 16 Nov 2024 22:31:55 +1100 Subject: [PATCH 029/342] all python and shell design tests pass --- pydra/design/shell.py | 99 ++++++++++++----------- pydra/design/tests/test_shell.py | 132 +++++++++++++++++-------------- 2 files changed, 126 insertions(+), 105 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 76430e7a50..2a2665acbb 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -7,8 +7,9 @@ import inspect from copy import copy import attrs +import builtins from fileformats.core import from_mime -from fileformats import generic, field +from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError from .base import ( Arg, @@ -20,6 +21,7 @@ make_interface, EMPTY, ) +from pydra.engine.specs import MultiInputObj from pydra.engine.task import ShellCommandTask @@ -92,6 +94,7 @@ def _validate_sep(self, attribute, value): if ( value is not None and self.type is not ty.Any + and ty.get_origin(self.type) is not MultiInputObj and not issubclass(self.type, ty.Iterable) ): raise ValueError( @@ -382,14 +385,16 @@ def parse_command_line_template( opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) opt_re = re.compile(opt_pattern) - bool_arg_re = re.compile(f"({opt_pattern})({arg_pattern})") + bool_arg_re = re.compile(f"({opt_pattern}){arg_pattern}") arguments = [] option = None - def merge_or_create_field(name, field_type, kwds): + def add_arg(name, field_type, kwds, is_option=False): """Merge the typing information with an existing field if it exists""" - if isinstance(field_type, out): + if is_option and kwds["type"] is not bool: + kwds["type"] |= None + if issubclass(field_type, Out): dct = outputs else: dct = inputs @@ -400,6 +405,9 @@ def merge_or_create_field(name, field_type, kwds): else: if isinstance(field, dict): field = field_type(**field) + elif isinstance(field, type) or ty.get_origin(field): + kwds["type"] = field + field = field_type(name=name, **kwds) elif not isinstance(field, field_type): # If field type is outarg not out field = field_type(**attrs.asdict(field)) field.name = name @@ -408,30 +416,31 @@ def merge_or_create_field(name, field_type, kwds): field.type = type_ for k, v in kwds.items(): setattr(field, k, v) - return field - - def add_option(opt): - name, field_type, kwds = opt - if kwds["type"] is not bool: - kwds["type"] |= None - arguments.append(merge_or_create_field(name, field_type, type_)) + dct[name] = field + arguments.append(field) def from_type_str(type_str) -> type: types = [] for tp in type_str.split(","): if "/" in tp: type_ = from_mime(tp) + elif tp == "...": + type_ = "..." else: - try: - type_ = from_mime(f"field/{tp}") - except FormatRecognitionError: + if tp in ("int", "float", "str", "bool"): + type_ = getattr(builtins, tp) + else: try: type_ = from_mime(f"generic/{tp}") except FormatRecognitionError: - raise ValueError(f"Unknown type {tp}") + raise ValueError( + f"Found unknown type, {tp!r}, in command template: {template!r}" + ) types.append(type_) - if len(types) > 1: - type_ = tuple[types] + if len(types) == 2 and types[1] == "...": + type_ = MultiInputObj[types[0]] + elif len(types) > 1: + type_ = tuple[*types] else: type_ = types[0] return type_ @@ -449,7 +458,9 @@ def from_type_str(type_str) -> type: name, type_str = name.split(":") type_ = from_type_str(type_str) else: - type_ = generic.FsObject if option is None else field.Text + type_ = generic.FsObject if option is None else str + if option is not None: + type_ |= None # Make the arguments optional kwds = {"type": type_} # If name contains a '.', treat it as a file template and strip it from the name if "." in name: @@ -462,37 +473,35 @@ def from_type_str(type_str) -> type: name = name.split(".")[0] elif field_type is outarg: kwds["path_template"] = name - + if ty.get_origin(type_) is MultiInputObj: + kwds["sep"] = " " if option is None: - arguments.append(merge_or_create_field(name, field_type, kwds)) + add_arg(name, field_type, kwds) else: - option[1].append((name, kwds)) - elif match := opt_re.match(token): - if option is not None: - add_option(option) - option = (match.group(1), field_type, []) + kwds["argstr"] = option + add_arg(name, field_type, kwds) elif match := bool_arg_re.match(token): - if option is not None: - add_option(option) - option = None - add_option(match.group(1), arg, [(match.group(2), field.Boolean)]) - if option is not None: - add_option(option) - - inferred_inputs = [] - inferred_outputs = [] - - for i, argument in enumerate(arguments, start=1): - argument.position = i - if isinstance(argument, outarg): - inferred_outputs.append(argument) + argstr, var = match.groups() + add_arg(var, arg, {"type": bool, "argstr": argstr, "default": False}) + elif match := opt_re.match(token): + option = token else: - inferred_inputs.append(argument) + raise ValueError( + f"Found unknown token '{token}' in command line template: {template}" + ) + + remaining_pos = remaining_positions(arguments, len(arguments) + 1, 1) + + for argument in arguments: + if argument.position is None: + argument.position = remaining_pos.pop(0) - return executable, inferred_inputs, inferred_outputs + return executable, list(inputs.values()), list(outputs.values()) -def remaining_positions(args: list[Arg], num_args: int | None = None) -> ty.List[int]: +def remaining_positions( + args: list[Arg], num_args: int | None = None, start: int = 0 +) -> ty.List[int]: """Get the remaining positions for input fields Parameters @@ -523,9 +532,11 @@ def remaining_positions(args: list[Arg], num_args: int | None = None) -> ty.List else: positions[num_args + arg.position].append(arg) if multiple_positions := { - k: f"{v.name}({v.position})" for k, v in positions.items() if len(v) > 1 + k: [f"{a.name}({a.position})" for a in v] + for k, v in positions.items() + if len(v) > 1 }: raise ValueError( f"Multiple fields have the overlapping positions: {multiple_positions}" ) - return [i for i in range(num_args) if i not in positions] + return [i for i in range(start, num_args) if i not in positions] diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index fbc2888887..d24480d8dd 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -4,8 +4,9 @@ import pytest import cloudpickle as cp from pydra.design import shell, Interface, list_fields -from fileformats.generic import File, Directory, FsObject, SetOf +from fileformats.generic import File, Directory, FsObject from fileformats import field, text, image +from pydra.engine.specs import MultiInputObj def test_interface_template(): @@ -20,12 +21,12 @@ def test_interface_template(): type=FsObject, position=2, ) - assert list_fields(SampleInterface) == [ + assert sorted_fields(SampleInterface) == [ shell.arg(name="executable", default="cp", type=str, position=0), shell.arg(name="in_path", type=FsObject, position=1), output, ] - assert list_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [output] intf = SampleInterface(in_path=File.mock("in-path.txt")) assert intf.executable == "cp" SampleInterface(in_path=File.mock("in-path.txt"), out_path=Path("./out-path.txt")) @@ -46,12 +47,12 @@ def test_interface_template_w_types_and_path_template_ext(): type=image.Png, position=2, ) - assert list_fields(SampleInterface) == [ + assert sorted_fields(SampleInterface) == [ shell.arg(name="executable", default="trim-png", type=str, position=0), shell.arg(name="in_image", type=image.Png, position=1), output, ] - assert list_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [output] SampleInterface(in_image=image.Png.mock()) SampleInterface(in_image=image.Png.mock(), out_image=Path("./new_image.png")) SampleInterface.Outputs(out_image=image.Png.mock()) @@ -61,10 +62,11 @@ def test_interface_template_more_complex(): SampleInterface = shell.interface( ( - "cp " + "cp " "-R " - "--text-arg --int-arg " - "--tuple-arg " + "--text-arg " + "--int-arg " + "--tuple-arg " ), ) @@ -76,27 +78,25 @@ def test_interface_template_more_complex(): position=2, default=True, ) - assert sorted(list_fields(SampleInterface), key=pos_key) == [ + assert sorted_fields(SampleInterface) == [ shell.arg(name="executable", default="cp", type=str, position=0), - shell.arg(name="in_fs_objects", type=SetOf[FsObject], position=1, sep=" "), - output, - shell.arg(name="recursive", arg_str="-R", type=bool, position=3), - shell.arg( - name="text_arg", arg_str="--text-arg", type=field.Text | None, position=5 - ), shell.arg( - name="int_arg", arg_str="--int-arg", type=field.Integer | None, position=6 + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), + output, + shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str | None, position=4), + shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=5), shell.arg( name="tuple_arg", - arg_str="--tuple-arg", - type=tuple[field.Integer, field.Text] | None, + argstr="--tuple-arg", + type=tuple[int, str] | None, position=6, ), ] - assert list_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [output] SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) - SampleInterface.Outputs(out_path=File.sample()) + SampleInterface.Outputs(out_dir=Directory.sample()) def test_interface_template_with_overrides(): @@ -108,10 +108,11 @@ def test_interface_template_with_overrides(): SampleInterface = shell.interface( ( - "cp " + "cp " "-R " - "--text-arg --int-arg " - "--tuple-arg " + "--text-arg " + "--int-arg " + "--tuple-arg " ), inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, outputs={"out_dir": shell.outarg(position=-1)}, @@ -125,43 +126,43 @@ def test_interface_template_with_overrides(): position=-1, default=True, ) - assert list_fields(SampleInterface) == [ + assert sorted_fields(SampleInterface) == [ shell.arg(name="executable", default="cp", type=str, position=0), - shell.arg(name="in_fs_objects", type=SetOf[FsObject], position=1, sep=" "), + shell.arg( + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + ), shell.arg( name="recursive", - arg_str="-R", + argstr="-R", type=bool, + default=False, help_string=RECURSIVE_HELP, position=2, ), - shell.arg( - name="text_arg", argstr="--text-arg", type=field.Text | None, position=3 - ), - shell.arg( - name="int_arg", argstr="--int-arg", type=field.Integer | None, position=4 - ), + shell.arg(name="text_arg", argstr="--text-arg", type=str | None, position=3), + shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=4), shell.arg( name="tuple_arg", argstr="--tuple-arg", - type=tuple[field.Integer, field.Text] | None, + type=tuple[int, str] | None, position=5, ), output, ] - assert list_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [output] def test_interface_template_with_type_overrides(): SampleInterface = shell.interface( ( - "cp " + "cp " "-R " - "--text-arg --int-arg " - "--tuple-arg " + "--text-arg " + "--int-arg " + "--tuple-arg " ), - inputs={"text_arg": str | None, "int_arg": int | None}, + inputs={"text_arg": str, "int_arg": int | None}, ) assert issubclass(SampleInterface, Interface) @@ -169,23 +170,26 @@ def test_interface_template_with_type_overrides(): name="out_dir", type=Directory, path_template="out_dir", - position=-1, + position=2, default=True, ) - assert list_fields(SampleInterface) == [ - shell.arg(name="in_fs_objects", type=SetOf[FsObject], position=1, sep=" "), - shell.arg(name="recursive", argstr="-R", type=bool, position=2), - shell.arg(name="text_arg", argstr="--text-arg", type=str | None, position=4), - shell.arg(name="int_arg", argstr="--text-arg", type=int | None, position=5), + assert sorted_fields(SampleInterface) == [ + shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg( + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + ), + output, + shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=4), + shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=5), shell.arg( name="tuple_arg", - targstr="--text-arg", - ype=tuple[field.Integer, field.Text] | None, + argstr="--tuple-arg", + type=tuple[int, str] | None, position=6, ), - output, ] - assert list_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [output] @pytest.fixture(params=["static", "dynamic"]) @@ -302,7 +306,7 @@ class Outputs: def test_shell_fields(Ls): - assert sorted([a.name for a in list_fields(Ls)]) == sorted( + assert sorted([a.name for a in sorted_fields(Ls)]) == sorted( [ "executable", "directory", @@ -314,7 +318,7 @@ def test_shell_fields(Ls): ] ) - assert [a.name for a in list_fields(Ls.Outputs)] == ["entries"] + assert [a.name for a in sorted_fields(Ls.Outputs)] == ["entries"] def test_shell_pickle_roundtrip(Ls, tmp_path): @@ -436,7 +440,7 @@ class Outputs: argstr="", position=-1, ) - assert sorted(list_fields(A), key=pos_key) == [ + assert sorted_fields(A) == [ shell.arg( name="executable", default="cp", @@ -453,7 +457,7 @@ class Outputs: ), output, ] - assert list_fields(A.Outputs) == [output] + assert sorted_fields(A.Outputs) == [output] def test_shell_output_field_name_dynamic(): @@ -674,13 +678,19 @@ def list_entries(stdout): return stdout.split("\n")[:-1] -def pos_key(out: shell.out) -> int: - LARGE_NUMBER = 1000000 - try: - pos = out.position - except AttributeError: - pos = LARGE_NUMBER - else: +def sorted_fields(interface): + fields = list_fields(interface) + length = len(fields) + + def pos_key(out: shell.out) -> int: + try: + pos = out.position + except AttributeError: + return (length, out.name) if pos < 0: - pos = LARGE_NUMBER + pos - return pos + key = length + pos + else: + key = pos + return (key, out.name) + + return sorted(fields, key=pos_key) From b38ced60914aaaa447a95082cba286bf89f35672 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 16 Nov 2024 22:43:04 +1100 Subject: [PATCH 030/342] added initialization tests to test_python --- pydra/design/tests/test_python.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 2b08231932..1e574086a8 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -26,6 +26,10 @@ def func(a: int) -> float: python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="out", type=float)] + SampleInterface(a=1) + SampleInterface.Outputs(out=2.0) + with pytest.raises(TypeError): + SampleInterface(a=1.5) def test_interface_wrap_function_with_default(): @@ -44,6 +48,9 @@ def func(a: int, k: float = 2.0) -> float: python.arg(name="k", type=float, default=2.0), ] assert outputs == [python.out(name="out", type=float)] + SampleInterface(a=1) + SampleInterface(a=10, k=3.0) + SampleInterface.Outputs(out=2.0) def test_interface_wrap_function_overrides(): @@ -67,6 +74,8 @@ def func(a: int) -> float: assert outputs == [ python.out(name="b", type=Decimal, help_string="the doubled output"), ] + outputs = SampleInterface.Outputs(b=Decimal(2.0)) + assert isinstance(outputs.b, Decimal) def test_interface_wrap_function_types(): @@ -88,6 +97,10 @@ def func(a: int) -> int: python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="b", type=float)] + intf = SampleInterface(a=1) + assert isinstance(intf.a, float) + outputs = SampleInterface.Outputs(b=2.0) + assert isinstance(outputs.b, float) def test_decorated_function_interface(): @@ -114,6 +127,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="d", type=float), ] assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" + SampleInterface.Outputs(c=1.0, d=2.0) def test_interface_with_function_implicit_outputs_from_return_stmt(): @@ -141,6 +155,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: python.out(name="d", type=float), ] assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" + SampleInterface.Outputs(c=1.0, d=2.0) def test_interface_with_function_docstr(): @@ -298,6 +313,9 @@ def function(a, b): python.out(name="d", type=float, help_string="Product of a and b"), ] assert SampleInterface.function.__name__ == "function" + SampleInterface(a=1) + SampleInterface(a=1, b=2.0) + SampleInterface.Outputs(c=1.0, d=2.0) def test_interface_with_inheritance(): @@ -366,6 +384,12 @@ def function(a, b): python.out(name="d", type=float, help_string="Product of a and b"), ] assert SampleInterface.function.__name__ == "function" + SampleInterface(a=1, b=2.0) + SampleInterface.Outputs(c=1.0, d=2.0) + with pytest.raises(TypeError): + SampleInterface(a=1, b=2.0, x=3) + with pytest.raises(TypeError): + SampleInterface.Outputs(c=1.0, d=2.0, y="hello") def test_interface_invalid_wrapped1(): From a9356d1b40459b0a86f8516e32b5c80d58c32514 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 16 Nov 2024 22:47:06 +1100 Subject: [PATCH 031/342] renamed Interface->TaskSpec and interface->define --- pydra/design/__init__.py | 4 +-- pydra/design/base.py | 16 ++++----- pydra/design/python.py | 14 ++++---- pydra/design/shell.py | 12 +++---- pydra/design/tests/test_python.py | 50 ++++++++++++++-------------- pydra/design/tests/test_shell.py | 54 +++++++++++++++---------------- 6 files changed, 75 insertions(+), 75 deletions(-) diff --git a/pydra/design/__init__.py b/pydra/design/__init__.py index 69f789ca66..9b86627949 100644 --- a/pydra/design/__init__.py +++ b/pydra/design/__init__.py @@ -1,6 +1,6 @@ -from .base import Interface, list_fields +from .base import TaskSpec, list_fields from . import python from . import shell -__all__ = ["Interface", "list_fields", "python", "shell"] +__all__ = ["TaskSpec", "list_fields", "python", "shell"] diff --git a/pydra/design/base.py b/pydra/design/base.py index 1ca7aa813c..76782b1e16 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -23,9 +23,9 @@ "Field", "Arg", "Out", - "Interface", + "TaskSpec", "collate_fields", - "make_interface", + "make_task_spec", "list_fields", ] @@ -131,7 +131,7 @@ class Out(Field): OutputType = ty.TypeVar("OutputType") -class Interface(ty.Generic[OutputType]): +class TaskSpec(ty.Generic[OutputType]): Task: ty.Type[Task] @@ -318,7 +318,7 @@ def _get_default(field: Field) -> ty.Any: return attrs.NOTHING -def make_interface( +def make_task_spec( task_type: type[Task], inputs: list[Arg], outputs: list[Out], @@ -346,12 +346,12 @@ def make_interface( outputs_klass.__annotations__.update((o.name, o.type) for o in outputs) outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) - if klass is None or not issubclass(klass, Interface): + if klass is None or not issubclass(klass, TaskSpec): if name is None: raise ValueError("name must be provided if klass is not") bases = tuple(bases) - if not any(issubclass(b, Interface) for b in bases): - bases = bases + (Interface,) + if not any(issubclass(b, TaskSpec) for b in bases): + bases = bases + (TaskSpec,) if klass is not None: bases += tuple(c for c in klass.__mro__ if c not in bases + (object,)) klass = types.new_class( @@ -635,7 +635,7 @@ def split_block(string: str) -> ty.Generator[str, None, None]: yield block.strip() -def list_fields(interface: Interface) -> list[Field]: +def list_fields(interface: TaskSpec) -> list[Field]: if not attrs.has(interface): return [] return [ diff --git a/pydra/design/python.py b/pydra/design/python.py index a1f7a6d89f..798821296b 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -6,8 +6,8 @@ Arg, Out, collate_fields, - make_interface, - Interface, + make_task_spec, + TaskSpec, parse_doc_string, extract_inputs_and_outputs_from_function, check_explicit_fields_are_none, @@ -15,7 +15,7 @@ ) -__all__ = ["arg", "out", "interface"] +__all__ = ["arg", "out", "define"] @attrs.define @@ -28,7 +28,7 @@ class out(Out): pass -def interface( +def define( wrapped: type | ty.Callable | None = None, /, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, @@ -36,7 +36,7 @@ def interface( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, -) -> Interface: +) -> TaskSpec: """ Create an interface for a function or a class. @@ -52,7 +52,7 @@ def interface( Whether to use auto_attribs mode when creating the class. """ - def make(wrapped: ty.Callable | type) -> Interface: + def make(wrapped: ty.Callable | type) -> TaskSpec: if inspect.isclass(wrapped): klass = wrapped function = klass.function @@ -89,7 +89,7 @@ def make(wrapped: ty.Callable | type) -> Interface: pass parsed_inputs.append(arg(name="function", type=ty.Callable, default=function)) - interface = make_interface( + interface = make_task_spec( FunctionTask, parsed_inputs, parsed_outputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 2a2665acbb..4678b91ca4 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -17,8 +17,8 @@ check_explicit_fields_are_none, get_fields_from_class, collate_fields, - Interface, - make_interface, + TaskSpec, + make_task_spec, EMPTY, ) from pydra.engine.specs import MultiInputObj @@ -191,7 +191,7 @@ def _validate_path_template(self, attribute, value): ) -def interface( +def define( wrapped: type | str | None = None, /, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, @@ -200,7 +200,7 @@ def interface( outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, name: str | None = None, -) -> Interface: +) -> TaskSpec: """Create a shell command interface Parameters @@ -222,7 +222,7 @@ def interface( def make( wrapped: ty.Callable | type | None = None, - ) -> Interface: + ) -> TaskSpec: if inspect.isclass(wrapped): klass = wrapped @@ -284,7 +284,7 @@ def make( inpt.position = position_stack.pop() parsed_inputs.sort(key=lambda x: x.position) - interface = make_interface( + interface = make_task_spec( ShellCommandTask, parsed_inputs, parsed_outputs, diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 1e574086a8..8939539d58 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -3,7 +3,7 @@ from decimal import Decimal import attrs import pytest -from pydra.design import list_fields, Interface +from pydra.design import list_fields, TaskSpec from pydra.design import python from pydra.engine.task import FunctionTask @@ -16,9 +16,9 @@ def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = python.interface(func) + SampleInterface = python.define(func) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -37,9 +37,9 @@ def func(a: int, k: float = 2.0) -> float: """Sample function with inputs and outputs""" return a * k - SampleInterface = python.interface(func) + SampleInterface = python.define(func) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -58,13 +58,13 @@ def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = python.interface( + SampleInterface = python.define( func, inputs={"a": python.arg(help_string="The argument to be doubled")}, outputs={"b": python.out(help_string="the doubled output", type=Decimal)}, ) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -83,13 +83,13 @@ def func(a: int) -> int: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = python.interface( + SampleInterface = python.define( func, inputs={"a": float}, outputs={"b": float}, ) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -104,12 +104,12 @@ def func(a: int) -> int: def test_decorated_function_interface(): - @python.interface(outputs=["c", "d"]) + @python.define(outputs=["c", "d"]) def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" return a + b, a * b - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) assert SampleInterface.Task is FunctionTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) @@ -131,7 +131,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: def test_interface_with_function_implicit_outputs_from_return_stmt(): - @python.interface + @python.define def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" c = a + b @@ -159,7 +159,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: def test_interface_with_function_docstr(): - @python.interface(outputs=["c", "d"]) + @python.define(outputs=["c", "d"]) def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing @@ -190,7 +190,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: def test_interface_with_function_google_docstr(): - @python.interface(outputs=["c", "d"]) + @python.define(outputs=["c", "d"]) def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing @@ -225,7 +225,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: def test_interface_with_function_numpy_docstr(): - @python.interface( + @python.define( outputs=["c", "d"] ) # Could potentiall read output names from doc-string instead def SampleInterface(a: int, b: float) -> tuple[float, float]: @@ -268,7 +268,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: def test_interface_with_class(): - @python.interface + @python.define class SampleInterface: """Sample class for testing @@ -295,7 +295,7 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) assert SampleInterface.Task is FunctionTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) @@ -319,8 +319,8 @@ def function(a, b): def test_interface_with_inheritance(): - @python.interface - class SampleInterface(Interface["SampleInterface.Outputs"]): + @python.define + class SampleInterface(TaskSpec["SampleInterface.Outputs"]): """Sample class for testing Args: @@ -346,11 +346,11 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) def test_interface_with_class_no_auto_attribs(): - @python.interface(auto_attribs=False) + @python.define(auto_attribs=False) class SampleInterface: a: int = python.arg(help_string="First input to be inputted") b: float = python.arg(help_string="Second input") @@ -395,8 +395,8 @@ def function(a, b): def test_interface_invalid_wrapped1(): with pytest.raises(ValueError): - @python.interface(inputs={"a": python.arg()}) - class SampleInterface(Interface["SampleInterface.Outputs"]): + @python.define(inputs={"a": python.arg()}) + class SampleInterface(TaskSpec["SampleInterface.Outputs"]): a: int class Outputs: @@ -410,8 +410,8 @@ def function(a): def test_interface_invalid_wrapped2(): with pytest.raises(ValueError): - @python.interface(outputs={"b": python.out()}) - class SampleInterface(Interface["SampleInterface.Outputs"]): + @python.define(outputs={"b": python.out()}) + class SampleInterface(TaskSpec["SampleInterface.Outputs"]): a: int class Outputs: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index d24480d8dd..4dedb90e23 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -3,17 +3,17 @@ import attrs import pytest import cloudpickle as cp -from pydra.design import shell, Interface, list_fields +from pydra.design import shell, TaskSpec, list_fields from fileformats.generic import File, Directory, FsObject -from fileformats import field, text, image +from fileformats import text, image from pydra.engine.specs import MultiInputObj def test_interface_template(): - SampleInterface = shell.interface("cp ") + SampleInterface = shell.define("cp ") - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) output = shell.outarg( name="out_path", path_template="out_path", @@ -35,11 +35,11 @@ def test_interface_template(): def test_interface_template_w_types_and_path_template_ext(): - SampleInterface = shell.interface( + SampleInterface = shell.define( "trim-png " ) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) output = shell.outarg( name="out_image", path_template="out_image.png", @@ -60,7 +60,7 @@ def test_interface_template_w_types_and_path_template_ext(): def test_interface_template_more_complex(): - SampleInterface = shell.interface( + SampleInterface = shell.define( ( "cp " "-R " @@ -70,7 +70,7 @@ def test_interface_template_more_complex(): ), ) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) output = shell.outarg( name="out_dir", type=Directory, @@ -106,7 +106,7 @@ def test_interface_template_with_overrides(): "subtree connected at that point." ) - SampleInterface = shell.interface( + SampleInterface = shell.define( ( "cp " "-R " @@ -118,7 +118,7 @@ def test_interface_template_with_overrides(): outputs={"out_dir": shell.outarg(position=-1)}, ) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) output = shell.outarg( name="out_dir", type=Directory, @@ -154,7 +154,7 @@ def test_interface_template_with_overrides(): def test_interface_template_with_type_overrides(): - SampleInterface = shell.interface( + SampleInterface = shell.define( ( "cp " "-R " @@ -165,7 +165,7 @@ def test_interface_template_with_type_overrides(): inputs={"text_arg": str, "int_arg": int | None}, ) - assert issubclass(SampleInterface, Interface) + assert issubclass(SampleInterface, TaskSpec) output = shell.outarg( name="out_dir", type=Directory, @@ -196,8 +196,8 @@ def test_interface_template_with_type_overrides(): def Ls(request): if request.param == "static": - @shell.interface - class Ls(Interface["Ls.Outputs"]): + @shell.define + class Ls(TaskSpec["Ls.Outputs"]): executable = "ls" directory: Directory = shell.arg( @@ -246,7 +246,7 @@ class Outputs: ) elif request.param == "dynamic": - Ls = shell.interface( + Ls = shell.define( "ls", inputs={ "directory": shell.arg( @@ -357,7 +357,7 @@ def test_shell_run(Ls, tmp_path): def A(request): if request.param == "static": - @shell.interface + @shell.define class A: """An example shell interface described in a class @@ -382,7 +382,7 @@ class Outputs: y: File = shell.outarg(path_template="{x}_out", position=-1) elif request.param == "dynamic": - A = shell.interface( + A = shell.define( "cp", inputs={ "x": shell.arg( @@ -413,7 +413,7 @@ def test_shell_output_path_template(A): def test_shell_output_field_name_static(): - @shell.interface + @shell.define class A: """Copy a file""" @@ -461,7 +461,7 @@ class Outputs: def test_shell_output_field_name_dynamic(): - A = shell.interface( + A = shell.define( "cp", name="A", inputs={ @@ -492,7 +492,7 @@ def get_file_size(y: Path): def test_shell_bases_dynamic(A, tmp_path): - B = shell.interface( + B = shell.define( name="B", inputs={ "y": shell.arg(type=File, help_string="output file", argstr="", position=-1) @@ -522,7 +522,7 @@ def test_shell_bases_dynamic(A, tmp_path): def test_shell_bases_static(A, tmp_path): - @shell.interface + @shell.define class B(A): y: text.Plain = shell.arg() # Override the output arg in A @@ -555,7 +555,7 @@ class Outputs: def test_shell_inputs_outputs_bases_dynamic(tmp_path): - A = shell.interface( + A = shell.define( "ls", name="A", inputs={ @@ -574,7 +574,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): ) }, ) - B = shell.interface( + B = shell.define( "ls", name="B", inputs={ @@ -600,7 +600,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): def test_shell_inputs_outputs_bases_static(tmp_path): - @shell.interface + @shell.define class A: executable = "ls" @@ -614,7 +614,7 @@ class Outputs: callable=list_entries, ) - @shell.interface + @shell.define class B(A): hidden: bool = shell.arg( help_string="show hidden files", @@ -636,7 +636,7 @@ class B(A): def test_shell_missing_executable_static(): with pytest.raises(AttributeError, match="must have an `executable` attribute"): - @shell.interface + @shell.define class A: directory: Directory = shell.arg( help_string="input directory", argstr="", position=-1 @@ -654,7 +654,7 @@ def test_shell_missing_executable_dynamic(): ValueError, match=r"name \('A'\) can only be provided when creating a class dynamically", ): - shell.interface( + shell.define( name="A", inputs={ "directory": shell.arg( From b9594ee79dd7540be29d174530f0e5dcc6ade063 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 17 Nov 2024 13:30:41 +1100 Subject: [PATCH 032/342] converted to inputs and outputs to dicts from lists within design make functions --- pydra/design/base.py | 75 +++++++++++++++++++----------------------- pydra/design/python.py | 8 ++--- pydra/design/shell.py | 25 +++++++------- 3 files changed, 51 insertions(+), 57 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 76782b1e16..9366e5d13b 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -169,22 +169,23 @@ def collate_fields( arg_type: type[Arg], out_type: type[Out], doc_string: str | None = None, - inputs: list[str | Arg] | dict[str, Arg | type] | None = None, - outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + inputs: dict[str, Arg | type] | None = None, + outputs: dict[str, Out | type] | None = None, input_helps: dict[str, str] | None = None, output_helps: dict[str, str] | None = None, -) -> tuple[list[Arg], list[Out]]: +) -> tuple[dict[str, Arg], dict[str, Out]]: if inputs is None: - inputs = [] + inputs = {} elif isinstance(inputs, list): - inputs = [ - a if isinstance(a, Arg) else arg_type(a, help_string=input_helps.get(a, "")) - for a in inputs - ] + inputs_dict = {} + for inpt in inputs: + if not isinstance(inpt, Arg): + inpt = arg_type(inpt, help_string=input_helps.get(inpt, "")) + inputs_dict[inpt.name] = inpt + inputs = inputs_dict elif isinstance(inputs, dict): - inputs_list = [] - for input_name, arg in inputs.items(): + for input_name, arg in list(inputs.items()): if isinstance(arg, Arg): if arg.name is None: arg.name = input_name @@ -199,27 +200,23 @@ def collate_fields( if not arg.help_string: arg.help_string = input_helps.get(input_name, "") else: - arg = arg_type( + inputs[input_name] = arg_type( type=arg, name=input_name, help_string=input_helps.get(input_name, ""), ) - inputs_list.append(arg) - inputs = inputs_list if outputs is None: - outputs = [] + outputs = {} elif isinstance(outputs, list): - outputs = [ - ( - o - if isinstance(o, Out) - else out_type(name=o, type=ty.Any, help_string=output_helps.get(o, "")) - ) - for o in outputs - ] + outputs_dict = {} + for out in outputs: + if not isinstance(out, Out): + out = out_type(out, type=ty.Any, help_string=output_helps.get(out, "")) + outputs_dict[out.name] = out + outputs = outputs_dict elif isinstance(outputs, dict): - for output_name, out in outputs.items(): + for output_name, out in list(outputs.items()): if isinstance(out, Out): if out.name is None: out.name = output_name @@ -233,14 +230,12 @@ def collate_fields( out.name = output_name if not out.help_string: out.help_string = output_helps.get(output_name, "") - outputs = [ - ( - o - if isinstance(o, Out) - else out_type(name=n, type=o, help_string=output_helps.get(n, "")) - ) - for n, o in outputs.items() - ] + else: + outputs[output_name] = out_type( + type=out, + name=output_name, + help_string=output_helps.get(output_name, ""), + ) return inputs, outputs @@ -250,12 +245,12 @@ def get_fields_from_class( arg_type: type[Arg], out_type: type[Out], auto_attribs: bool, -) -> tuple[list[Field], list[Field]]: +) -> tuple[dict[str, Arg], dict[str, Out]]: """Parse the input and output fields from a class""" input_helps, _ = parse_doc_string(klass.__doc__) - def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: + def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: """Get the fields from a class""" fields_dict = {} # Get fields defined in base classes if present @@ -292,7 +287,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> list[Field]: fields_dict[atr_name] = field_type( name=atr_name, type=type_, help_string=helps.get(atr_name, "") ) - return list(fields_dict.values()) + return fields_dict inputs = get_fields(klass, arg_type, auto_attribs, input_helps) @@ -320,15 +315,13 @@ def _get_default(field: Field) -> ty.Any: def make_task_spec( task_type: type[Task], - inputs: list[Arg], - outputs: list[Out], + inputs: dict[str, Arg], + outputs: dict[str, Out], klass: type | None = None, name: str | None = None, bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), ): - assert isinstance(inputs, list) - assert isinstance(outputs, list) if name is None and klass is not None: name = klass.__name__ outputs_klass = type( @@ -340,10 +333,10 @@ def make_task_spec( metadata={PYDRA_ATTR_METADATA: o}, default=_get_default(o), ) - for o in outputs + for o in outputs.values() }, ) - outputs_klass.__annotations__.update((o.name, o.type) for o in outputs) + outputs_klass.__annotations__.update((o.name, o.type) for o in outputs.values()) outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) if klass is None or not issubclass(klass, TaskSpec): @@ -369,7 +362,7 @@ def make_task_spec( klass.Task = task_type klass.Outputs = outputs_klass # Now that we have saved the attributes in lists to be - for arg in inputs: + for arg in inputs.values(): # If an outarg input then the field type should be Path not a FileSet if isinstance(arg, Out) and is_fileset_or_union(arg.type): if getattr(arg, "path_template", False): diff --git a/pydra/design/python.py b/pydra/design/python.py index 798821296b..658f80abc4 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -83,11 +83,9 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: output_helps=output_helps, ) - try: - parsed_inputs.remove(next(i for i in parsed_inputs if i.name == "function")) - except StopIteration: - pass - parsed_inputs.append(arg(name="function", type=ty.Callable, default=function)) + parsed_inputs["function"] = arg( + name="function", type=ty.Callable, default=function + ) interface = make_task_spec( FunctionTask, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 4678b91ca4..040599139c 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -24,6 +24,8 @@ from pydra.engine.specs import MultiInputObj from pydra.engine.task import ShellCommandTask +__all__ = ["arg", "out", "outarg", "define"] + @attrs.define(kw_only=True) class arg(Arg): @@ -270,19 +272,18 @@ def make( # Update the inputs (overriding inputs from base classes) with the executable # and the output argument fields - inputs_dict = {i.name: i for i in parsed_inputs} - inputs_dict.update({o.name: o for o in parsed_outputs if isinstance(o, arg)}) - inputs_dict["executable"] = arg( + parsed_inputs.update( + {o.name: o for o in parsed_outputs.values() if isinstance(o, arg)} + ) + parsed_inputs["executable"] = arg( name="executable", type=str, argstr="", position=0, default=executable ) - parsed_inputs = list(inputs_dict.values()) # Set positions for the remaining inputs that don't have an explicit position - position_stack = list(reversed(remaining_positions(parsed_inputs))) - for inpt in parsed_inputs: + position_stack = remaining_positions(list(parsed_inputs.values())) + for inpt in parsed_inputs.values(): if inpt.position is None: - inpt.position = position_stack.pop() - parsed_inputs.sort(key=lambda x: x.position) + inpt.position = position_stack.pop(0) interface = make_task_spec( ShellCommandTask, @@ -326,7 +327,7 @@ def make( def parse_command_line_template( template: str, inputs: list[str | Arg] | dict[str, Arg | type] | None = None, - outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + outputs: list[str | Out] | dict[str, Out | type] | None = None, ) -> ty.Tuple[str, dict[str, Arg | type], dict[str, Out | type]]: """Parses a command line template into a name and input and output fields. Fields are inferred from the template if not provided, where inputs are specified with `` @@ -369,12 +370,14 @@ def parse_command_line_template( elif isinstance(inputs, dict): inputs = copy(inputs) # We don't want to modify the original else: + assert inputs is None inputs = {} if isinstance(outputs, list): outputs = {out.name: out for out in outputs} elif isinstance(outputs, dict): outputs = copy(outputs) # We don't want to modify the original - elif not outputs: + else: + assert outputs is None outputs = {} parts = template.split(maxsplit=1) if len(parts) == 1: @@ -496,7 +499,7 @@ def from_type_str(type_str) -> type: if argument.position is None: argument.position = remaining_pos.pop(0) - return executable, list(inputs.values()), list(outputs.values()) + return executable, inputs, outputs def remaining_positions( From 91b41165b6ba4f73c0c4acd7fe518305eb1ec85c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 17 Nov 2024 13:56:19 +1100 Subject: [PATCH 033/342] added workflow design module --- pydra/design/base.py | 19 ++++ pydra/design/python.py | 51 +++++++++ pydra/design/workflow.py | 229 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 pydra/design/workflow.py diff --git a/pydra/design/base.py b/pydra/design/base.py index 9366e5d13b..3068767d21 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -125,6 +125,25 @@ class Arg(Field): @attrs.define(kw_only=True) class Out(Field): + """Base class for input and output fields to Pydra tasks + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + help_string: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + pass diff --git a/pydra/design/python.py b/pydra/design/python.py index 658f80abc4..b4c61197d2 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -20,11 +20,62 @@ @attrs.define class arg(Arg): + """Argument of a Python task spec + + Parameters + ---------- + help_string: str + A short description of the input field. + default : Any, optional + the default value for the argument + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + Names of the inputs that are required together with the field. + xor: list, optional + Names of the inputs that are mutually exclusive with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + """ + pass @attrs.define class out(Out): + """Output of a Python task spec + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + help_string: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + pass diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py new file mode 100644 index 0000000000..204701911c --- /dev/null +++ b/pydra/design/workflow.py @@ -0,0 +1,229 @@ +import typing as ty +import inspect +import attrs +from pydra.engine.task import FunctionTask +from pydra.engine.core import Workflow, AuditFlag +from .base import ( + Arg, + Out, + collate_fields, + make_task_spec, + TaskSpec, + parse_doc_string, + extract_inputs_and_outputs_from_function, + check_explicit_fields_are_none, + get_fields_from_class, +) + + +__all__ = ["arg", "out", "define", "this", "add", "Node", "WorkflowSpec"] + + +@attrs.define +class arg(Arg): + """Argument of a workflow task spec + + Parameters + ---------- + help_string: str + A short description of the input field. + default : Any, optional + the default value for the argument + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + Names of the inputs that are required together with the field. + xor: list, optional + Names of the inputs that are mutually exclusive with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + type: type, optional + The type of the field, by default it is Any + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + lazy: bool, optional + If True the input field is not required at construction time but is passed straight + through to the tasks, by default it is False + """ + + lazy: bool = False + + +@attrs.define +class out(Out): + """Output of a workflow task spec + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + help_string: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + + pass + + +def define( + wrapped: type | ty.Callable | None = None, + /, + inputs: list[str | Arg] | dict[str, Arg | type] | None = None, + outputs: list[str | Out] | dict[str, Out | type] | type | None = None, + bases: ty.Sequence[type] = (), + outputs_bases: ty.Sequence[type] = (), + lazy: list[str] | None = None, + auto_attribs: bool = True, +) -> TaskSpec: + """ + Create an interface for a function or a class. + + Parameters + ---------- + wrapped : type | callable | None + The function or class to create an interface for. + inputs : list[str | Arg] | dict[str, Arg | type] | None + The inputs to the function or class. + outputs : list[str | Out] | dict[str, Out | type] | type | None + The outputs of the function or class. + auto_attribs : bool + Whether to use auto_attribs mode when creating the class. + """ + if lazy is None: + lazy = [] + + def make(wrapped: ty.Callable | type) -> TaskSpec: + if inspect.isclass(wrapped): + klass = wrapped + constructor = klass.constructor + name = klass.__name__ + check_explicit_fields_are_none(klass, inputs, outputs) + parsed_inputs, parsed_outputs = get_fields_from_class( + klass, arg, out, auto_attribs + ) + else: + if not inspect.isfunction(wrapped): + raise ValueError( + f"wrapped must be a class or a function, not {wrapped!r}" + ) + klass = None + constructor = wrapped + input_helps, output_helps = parse_doc_string(constructor.__doc__) + inferred_inputs, inferred_outputs = ( + extract_inputs_and_outputs_from_function( + constructor, arg, inputs, outputs + ) + ) + name = constructor.__name__ + + parsed_inputs, parsed_outputs = collate_fields( + arg_type=arg, + out_type=out, + inputs=inferred_inputs, + outputs=inferred_outputs, + input_helps=input_helps, + output_helps=output_helps, + ) + + parsed_inputs["constructor"] = arg( + name="constructor", type=ty.Callable, default=constructor + ) + for inpt_name in lazy: + parsed_inputs[inpt_name].lazy = True + + interface = make_task_spec( + FunctionTask, + parsed_inputs, + parsed_outputs, + name=name, + klass=klass, + bases=bases, + outputs_bases=outputs_bases, + ) + + return interface + + if wrapped is not None: + if not isinstance(wrapped, (ty.Callable, type)): + raise ValueError(f"wrapped must be a class or a callable, not {wrapped!r}") + return make(wrapped) + return make + + +OutputType = ty.TypeVar("OutputType") + + +class WorkflowSpec(TaskSpec[OutputType]): + + under_construction: Workflow = None + + def __construct__( + self, + audit_flags: AuditFlag = AuditFlag.NONE, + cache_dir: ty.Any | None = None, + cache_locations: ty.Any | None = None, + cont_dim: ty.Any | None = None, + messenger_args: ty.Any | None = None, + messengers: ty.Any | None = None, + rerun: bool = False, + propagate_rerun: bool = True, + ) -> OutputType: + wf = self.under_construction = Workflow( + name=type(self).__name__, + inputs=self, + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + cont_dim=cont_dim, + messenger_args=messenger_args, + messengers=messengers, + rerun=rerun, + propagate_rerun=propagate_rerun, + ) + + try: + output_fields = self.construct(**attrs.asdict(self)) + finally: + self.under_construction = None + + wf.outputs = self.Outputs( + **dict( + zip( + (f.name for f in attrs.fields(self.Outputs)), + output_fields, + ) + ) + ) + return wf + + +def this() -> Workflow: + return WorkflowSpec.under_construction + + +def add(task_spec: TaskSpec[OutputType]) -> OutputType: + return this().add(task_spec) + + +@attrs.define +class Node: + task_spec: TaskSpec + splitter: str | list[str] | None = None From 8130802a74b6c7d7733cf6ea8dc3ef632a2c023f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 09:21:46 +1100 Subject: [PATCH 034/342] added check for symbols defined outside to the scope of the function --- pydra/design/base.py | 114 ++++++++++++++++++--------------------- pydra/design/python.py | 10 ++-- pydra/design/shell.py | 4 +- pydra/design/workflow.py | 12 ++--- pydra/utils/misc.py | 85 +++++++++++++++++++++++++++++ 5 files changed, 148 insertions(+), 77 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 3068767d21..811cc52359 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -9,6 +9,7 @@ from attrs.converters import default_if_none from fileformats.generic import File from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union +from pydra.utils.misc import get_undefined_symbols from pydra.engine.helpers import from_list_if_single, ensure_list from pydra.engine.specs import ( LazyField, @@ -24,7 +25,7 @@ "Arg", "Out", "TaskSpec", - "collate_fields", + "collate_with_helps", "make_task_spec", "list_fields", ] @@ -156,7 +157,6 @@ class TaskSpec(ty.Generic[OutputType]): def __call__( self, - interface, name: str | None = None, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, @@ -170,7 +170,6 @@ def __call__( ): task = self.Task( self, - interface, name=name, audit_flags=audit_flags, cache_dir=cache_dir, @@ -184,7 +183,7 @@ def __call__( return task(**kwargs) -def collate_fields( +def collate_with_helps( arg_type: type[Arg], out_type: type[Out], doc_string: str | None = None, @@ -193,68 +192,49 @@ def collate_fields( input_helps: dict[str, str] | None = None, output_helps: dict[str, str] | None = None, ) -> tuple[dict[str, Arg], dict[str, Out]]: + """""" - if inputs is None: - inputs = {} - elif isinstance(inputs, list): - inputs_dict = {} - for inpt in inputs: - if not isinstance(inpt, Arg): - inpt = arg_type(inpt, help_string=input_helps.get(inpt, "")) - inputs_dict[inpt.name] = inpt - inputs = inputs_dict - elif isinstance(inputs, dict): - for input_name, arg in list(inputs.items()): - if isinstance(arg, Arg): - if arg.name is None: - arg.name = input_name - elif arg.name != input_name: - raise ValueError( - "Name of the argument must be the same as the key in the " - f"dictionary. The argument name is {arg.name} and the key " - f"is {input_name}" - ) - else: - arg.name = input_name - if not arg.help_string: - arg.help_string = input_helps.get(input_name, "") - else: - inputs[input_name] = arg_type( - type=arg, - name=input_name, - help_string=input_helps.get(input_name, ""), + for input_name, arg in list(inputs.items()): + if isinstance(arg, Arg): + if arg.name is None: + arg.name = input_name + elif arg.name != input_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {arg.name} and the key " + f"is {input_name}" ) - - if outputs is None: - outputs = {} - elif isinstance(outputs, list): - outputs_dict = {} - for out in outputs: - if not isinstance(out, Out): - out = out_type(out, type=ty.Any, help_string=output_helps.get(out, "")) - outputs_dict[out.name] = out - outputs = outputs_dict - elif isinstance(outputs, dict): - for output_name, out in list(outputs.items()): - if isinstance(out, Out): - if out.name is None: - out.name = output_name - elif out.name != output_name: - raise ValueError( - "Name of the argument must be the same as the key in the " - f"dictionary. The argument name is {out.name} and the key " - f"is {output_name}" - ) - else: - out.name = output_name - if not out.help_string: - out.help_string = output_helps.get(output_name, "") else: - outputs[output_name] = out_type( - type=out, - name=output_name, - help_string=output_helps.get(output_name, ""), + arg.name = input_name + if not arg.help_string: + arg.help_string = input_helps.get(input_name, "") + else: + inputs[input_name] = arg_type( + type=arg, + name=input_name, + help_string=input_helps.get(input_name, ""), + ) + + for output_name, out in list(outputs.items()): + if isinstance(out, Out): + if out.name is None: + out.name = output_name + elif out.name != output_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {out.name} and the key " + f"is {output_name}" ) + else: + out.name = output_name + if not out.help_string: + out.help_string = output_helps.get(output_name, "") + else: + outputs[output_name] = out_type( + type=out, + name=output_name, + help_string=output_helps.get(output_name, ""), + ) return inputs, outputs @@ -465,7 +445,7 @@ def allowed_values_validator(_, attribute, value): ) -def extract_inputs_and_outputs_from_function( +def extract_function_inputs_and_outputs( function: ty.Callable, arg_type: type[Arg], inputs: list[str | Arg] | dict[str, Arg | type] | None = None, @@ -473,6 +453,14 @@ def extract_inputs_and_outputs_from_function( ) -> tuple[dict[str, type | Arg], dict[str, type | Out]]: """Extract input output types and output names from the function source if they aren't explicitly""" + if undefined_symbols := get_undefined_symbols( + function, exclude_signature_type_hints=True + ): + raise ValueError( + f"The following symbols are not defined within the scope of the function " + f"{function!r}, {undefined_symbols}. Ensure that all imports are " + "defined within the function scope so it is portable" + ) sig = inspect.signature(function) type_hints = ty.get_type_hints(function) input_types = {} diff --git a/pydra/design/python.py b/pydra/design/python.py index b4c61197d2..fa540cae42 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -5,11 +5,11 @@ from .base import ( Arg, Out, - collate_fields, + collate_with_helps, make_task_spec, TaskSpec, parse_doc_string, - extract_inputs_and_outputs_from_function, + extract_function_inputs_and_outputs, check_explicit_fields_are_none, get_fields_from_class, ) @@ -120,12 +120,12 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: klass = None function = wrapped input_helps, output_helps = parse_doc_string(function.__doc__) - inferred_inputs, inferred_outputs = ( - extract_inputs_and_outputs_from_function(function, arg, inputs, outputs) + inferred_inputs, inferred_outputs = extract_function_inputs_and_outputs( + function, arg, inputs, outputs ) name = function.__name__ - parsed_inputs, parsed_outputs = collate_fields( + parsed_inputs, parsed_outputs = collate_with_helps( arg_type=arg, out_type=out, inputs=inferred_inputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 040599139c..fb0777f991 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -16,7 +16,7 @@ Out, check_explicit_fields_are_none, get_fields_from_class, - collate_fields, + collate_with_helps, TaskSpec, make_task_spec, EMPTY, @@ -258,7 +258,7 @@ def make( outputs=outputs, ) - parsed_inputs, parsed_outputs = collate_fields( + parsed_inputs, parsed_outputs = collate_with_helps( arg_type=arg, out_type=out, inputs=inferred_inputs, diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 204701911c..ce9946f1b2 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -6,11 +6,11 @@ from .base import ( Arg, Out, - collate_fields, + collate_with_helps, make_task_spec, TaskSpec, parse_doc_string, - extract_inputs_and_outputs_from_function, + extract_function_inputs_and_outputs, check_explicit_fields_are_none, get_fields_from_class, ) @@ -127,14 +127,12 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: klass = None constructor = wrapped input_helps, output_helps = parse_doc_string(constructor.__doc__) - inferred_inputs, inferred_outputs = ( - extract_inputs_and_outputs_from_function( - constructor, arg, inputs, outputs - ) + inferred_inputs, inferred_outputs = extract_function_inputs_and_outputs( + constructor, arg, inputs, outputs ) name = constructor.__name__ - parsed_inputs, parsed_outputs = collate_fields( + parsed_inputs, parsed_outputs = collate_with_helps( arg_type=arg, out_type=out, inputs=inferred_inputs, diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index 45b6a5c3ba..8f9866da1a 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -1,6 +1,9 @@ from pathlib import Path import re +import ast +import inspect import platformdirs +import builtins from pydra._version import __version__ user_cache_dir = Path( @@ -43,3 +46,85 @@ def exc_info_matches(exc_info, match, regex=False): return re.match(".*" + match, msg) else: return match in msg + + +def get_undefined_symbols(func, exclude_signature_type_hints: bool = False): + """ + Check the source code of a function and detect any symbols that aren't defined in its scope. + + Parameters + ---------- + func : callable + The function to analyze. + + Returns + ------- + set + A set of undefined symbols. + """ + # Get the source code of the function + source = inspect.getsource(func) + + # Parse the source code into an AST + tree = ast.parse(source) + + # Define a visitor class to traverse the AST + class SymbolVisitor(ast.NodeVisitor): + + def __init__(self): + # Initialize sets to track defined and used symbols + self.defined_symbols = set() + self.used_symbols = set() + + def visit_FunctionDef(self, node): + # Add function arguments to defined symbols + for arg in node.args.args: + self.defined_symbols.add(arg.arg) + if exclude_signature_type_hints: + # Exclude type hints from the defined symbols + type_hints_visitor = SymbolVisitor() + if node.returns: + type_hints_visitor.visit(node.returns) + for arg in node.args.args: + if arg.annotation: + type_hints_visitor.visit(arg.annotation) + type_hint_symbols = type_hints_visitor.used_symbols - self.used_symbols + self.generic_visit(node) + if exclude_signature_type_hints: + # Remove type hints from the used symbols + self.used_symbols -= type_hint_symbols + + def visit_Assign(self, node): + # Add assigned variables to defined symbols + for target in node.targets: + if isinstance(target, ast.Name): + self.defined_symbols.add(target.id) + self.generic_visit(node) + + def visit_Name(self, node): + # Add all variable names to used symbols + if isinstance(node.ctx, ast.Load): + self.used_symbols.add(node.id) + self.generic_visit(node) + + @property + def undefined_symbols(self): + return self.used_symbols - self.defined_symbols - get_builtin_type_names() + + # Create a visitor instance and visit the AST + visitor = SymbolVisitor() + visitor.visit(tree) + + return visitor.undefined_symbols + + +def get_builtin_type_names(): + """ + Get a list of built-in object type names in Python. + + Returns + ------- + set + A set of built-in object type names. + """ + return set(name for name, obj in vars(builtins).items() if isinstance(obj, type)) From 5aa9fbf404e2b83e03f60118086825d760f2ba0e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 10:09:59 +1100 Subject: [PATCH 035/342] enhanced get_undefined_symbols to skip decorator and signature type hints --- pydra/design/base.py | 133 ++++++++++++++++++++++--------------------- pydra/utils/misc.py | 15 ++++- 2 files changed, 82 insertions(+), 66 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 811cc52359..a7b2786c74 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -52,7 +52,7 @@ def is_type(_, __, val: ty.Any) -> bool: @attrs.define(kw_only=True) class Field: - """Base class for input and output fields to Pydra tasks + """Base class for input and output fields to task specifications Parameters ---------- @@ -83,7 +83,7 @@ class Field: @attrs.define(kw_only=True) class Arg(Field): - """Base class for input fields to Pydra tasks + """Base class for input fields of task specifications Parameters ---------- @@ -126,7 +126,7 @@ class Arg(Field): @attrs.define(kw_only=True) class Out(Field): - """Base class for input and output fields to Pydra tasks + """Base class for output fields of task specifications Parameters ---------- @@ -152,6 +152,7 @@ class Out(Field): class TaskSpec(ty.Generic[OutputType]): + """Base class for all task specifications""" Task: ty.Type[Task] @@ -183,62 +184,6 @@ def __call__( return task(**kwargs) -def collate_with_helps( - arg_type: type[Arg], - out_type: type[Out], - doc_string: str | None = None, - inputs: dict[str, Arg | type] | None = None, - outputs: dict[str, Out | type] | None = None, - input_helps: dict[str, str] | None = None, - output_helps: dict[str, str] | None = None, -) -> tuple[dict[str, Arg], dict[str, Out]]: - """""" - - for input_name, arg in list(inputs.items()): - if isinstance(arg, Arg): - if arg.name is None: - arg.name = input_name - elif arg.name != input_name: - raise ValueError( - "Name of the argument must be the same as the key in the " - f"dictionary. The argument name is {arg.name} and the key " - f"is {input_name}" - ) - else: - arg.name = input_name - if not arg.help_string: - arg.help_string = input_helps.get(input_name, "") - else: - inputs[input_name] = arg_type( - type=arg, - name=input_name, - help_string=input_helps.get(input_name, ""), - ) - - for output_name, out in list(outputs.items()): - if isinstance(out, Out): - if out.name is None: - out.name = output_name - elif out.name != output_name: - raise ValueError( - "Name of the argument must be the same as the key in the " - f"dictionary. The argument name is {out.name} and the key " - f"is {output_name}" - ) - else: - out.name = output_name - if not out.help_string: - out.help_string = output_helps.get(output_name, "") - else: - outputs[output_name] = out_type( - type=out, - name=output_name, - help_string=output_helps.get(output_name, ""), - ) - - return inputs, outputs - - def get_fields_from_class( klass: type, arg_type: type[Arg], @@ -328,7 +273,7 @@ def make_task_spec( tuple(outputs_bases), { o.name: attrs.field( - converter=get_converter(o, f"{name}.Outputs"), + converter=make_converter(o, f"{name}.Outputs"), metadata={PYDRA_ATTR_METADATA: o}, default=_get_default(o), ) @@ -382,8 +327,8 @@ def make_task_spec( arg.name, attrs.field( default=_get_default(arg), - converter=get_converter(arg, klass.__name__, field_type), - validator=get_validator(arg, klass.__name__), + converter=make_converter(arg, klass.__name__, field_type), + validator=make_validator(arg, klass.__name__), metadata={PYDRA_ATTR_METADATA: arg}, on_setattr=attrs.setters.convert, ), @@ -397,7 +342,65 @@ def make_task_spec( return attrs_klass -def get_converter(field: Field, interface_name: str, field_type: ty.Type | None = None): +def collate_with_helps( + arg_type: type[Arg], + out_type: type[Out], + doc_string: str | None = None, + inputs: dict[str, Arg | type] | None = None, + outputs: dict[str, Out | type] | None = None, + input_helps: dict[str, str] | None = None, + output_helps: dict[str, str] | None = None, +) -> tuple[dict[str, Arg], dict[str, Out]]: + """""" + + for input_name, arg in list(inputs.items()): + if isinstance(arg, Arg): + if arg.name is None: + arg.name = input_name + elif arg.name != input_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {arg.name} and the key " + f"is {input_name}" + ) + else: + arg.name = input_name + if not arg.help_string: + arg.help_string = input_helps.get(input_name, "") + else: + inputs[input_name] = arg_type( + type=arg, + name=input_name, + help_string=input_helps.get(input_name, ""), + ) + + for output_name, out in list(outputs.items()): + if isinstance(out, Out): + if out.name is None: + out.name = output_name + elif out.name != output_name: + raise ValueError( + "Name of the argument must be the same as the key in the " + f"dictionary. The argument name is {out.name} and the key " + f"is {output_name}" + ) + else: + out.name = output_name + if not out.help_string: + out.help_string = output_helps.get(output_name, "") + else: + outputs[output_name] = out_type( + type=out, + name=output_name, + help_string=output_helps.get(output_name, ""), + ) + + return inputs, outputs + + +def make_converter( + field: Field, interface_name: str, field_type: ty.Type | None = None +): if field_type is None: field_type = field.type checker_label = f"'{field.name}' field of {interface_name} interface" @@ -419,7 +422,7 @@ def get_converter(field: Field, interface_name: str, field_type: ty.Type | None return converter -def get_validator(field: Field, interface_name: str): +def make_validator(field: Field, interface_name: str): validators = [] if field.allowed_values: validators.append(allowed_values_validator) @@ -454,7 +457,7 @@ def extract_function_inputs_and_outputs( """Extract input output types and output names from the function source if they aren't explicitly""" if undefined_symbols := get_undefined_symbols( - function, exclude_signature_type_hints=True + function, exclude_signature_type_hints=True, ignore_decorator=True ): raise ValueError( f"The following symbols are not defined within the scope of the function " diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index 8f9866da1a..c25c1c9bb8 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -48,7 +48,9 @@ def exc_info_matches(exc_info, match, regex=False): return match in msg -def get_undefined_symbols(func, exclude_signature_type_hints: bool = False): +def get_undefined_symbols( + func, exclude_signature_type_hints: bool = False, ignore_decorator: bool = False +): """ Check the source code of a function and detect any symbols that aren't defined in its scope. @@ -65,6 +67,17 @@ def get_undefined_symbols(func, exclude_signature_type_hints: bool = False): # Get the source code of the function source = inspect.getsource(func) + # De-indent the source code if required + indent = re.match(r"^\s*", source).group() + source = ("\n" + source).replace("\n" + indent, "\n") + + if ignore_decorator: + # Remove the decorator from the source code, i.e. everything before the first + # unindented 'def ' keyword. + source = re.match( + r"(.*\n)(def .*)", "\n" + source, flags=re.MULTILINE | re.DOTALL + ).group(2) + # Parse the source code into an AST tree = ast.parse(source) From e4da14990cbd1d11e39e5ee44ea905a093b52225 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 13:37:50 +1100 Subject: [PATCH 036/342] debugged workflow construction tests --- pydra/design/base.py | 49 ++++--- pydra/design/tests/test_workflow.py | 39 ++++++ pydra/design/workflow.py | 58 +------- pydra/engine/__init__.py | 2 - pydra/engine/core.py | 4 +- pydra/engine/workflow.py | 204 ++++++++++++++++++++++++++++ 6 files changed, 277 insertions(+), 79 deletions(-) create mode 100644 pydra/design/tests/test_workflow.py create mode 100644 pydra/engine/workflow.py diff --git a/pydra/design/base.py b/pydra/design/base.py index a7b2786c74..ef3c7b0b9d 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -9,7 +9,8 @@ from attrs.converters import default_if_none from fileformats.generic import File from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union -from pydra.utils.misc import get_undefined_symbols + +# from pydra.utils.misc import get_undefined_symbols from pydra.engine.helpers import from_list_if_single, ensure_list from pydra.engine.specs import ( LazyField, @@ -247,16 +248,6 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: return inputs, outputs -def _get_default(field: Field) -> ty.Any: - if not hasattr(field, "default"): - return attrs.NOTHING - if field.default is not EMPTY: - return field.default - if is_optional(field.type): - return None - return attrs.NOTHING - - def make_task_spec( task_type: type[Task], inputs: dict[str, Arg], @@ -275,7 +266,7 @@ def make_task_spec( o.name: attrs.field( converter=make_converter(o, f"{name}.Outputs"), metadata={PYDRA_ATTR_METADATA: o}, - default=_get_default(o), + **_get_default(o), ) for o in outputs.values() }, @@ -326,11 +317,11 @@ def make_task_spec( klass, arg.name, attrs.field( - default=_get_default(arg), converter=make_converter(arg, klass.__name__, field_type), validator=make_validator(arg, klass.__name__), metadata={PYDRA_ATTR_METADATA: arg}, on_setattr=attrs.setters.convert, + **_get_default(arg), ), ) klass.__annotations__[arg.name] = field_type @@ -351,7 +342,7 @@ def collate_with_helps( input_helps: dict[str, str] | None = None, output_helps: dict[str, str] | None = None, ) -> tuple[dict[str, Arg], dict[str, Out]]: - """""" + """Assign help strings to the appropriate inputs and outputs""" for input_name, arg in list(inputs.items()): if isinstance(arg, Arg): @@ -456,14 +447,14 @@ def extract_function_inputs_and_outputs( ) -> tuple[dict[str, type | Arg], dict[str, type | Out]]: """Extract input output types and output names from the function source if they aren't explicitly""" - if undefined_symbols := get_undefined_symbols( - function, exclude_signature_type_hints=True, ignore_decorator=True - ): - raise ValueError( - f"The following symbols are not defined within the scope of the function " - f"{function!r}, {undefined_symbols}. Ensure that all imports are " - "defined within the function scope so it is portable" - ) + # if undefined_symbols := get_undefined_symbols( + # function, exclude_signature_type_hints=True, ignore_decorator=True + # ): + # raise ValueError( + # f"The following symbols are not defined within the scope of the function " + # f"{function!r}, {undefined_symbols}. Ensure that all imports are " + # "defined within the function scope so it is portable" + # ) sig = inspect.signature(function) type_hints = ty.get_type_hints(function) input_types = {} @@ -661,6 +652,20 @@ def check_explicit_fields_are_none(klass, inputs, outputs): ) +def _get_default(field: Field) -> dict[str, ty.Any]: + if not hasattr(field, "default"): + return {"factory": nothing_factory} + if field.default is not EMPTY: + return {"default": field.default} + if is_optional(field.type): + return {"default": None} + return {"factory": nothing_factory} + + +def nothing_factory(): + return attrs.NOTHING + + white_space_re = re.compile(r"\s+") PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py new file mode 100644 index 0000000000..d180569d0e --- /dev/null +++ b/pydra/design/tests/test_workflow.py @@ -0,0 +1,39 @@ +from pydra.engine.workflow import Workflow +from pydra.engine.specs import LazyField +import typing as ty +from pydra.design import shell, python, workflow, list_fields + + +def test_workflow(): + + @workflow.define + def MyTestWorkflow(a: int, b: float) -> float: + + @python.define + def Add(a, b): + return a + b + + @python.define + def Mul(a, b): + return a * b + + add = workflow.add(Add(a=a, b=b)) + mul = workflow.add(Mul(a=add.out, b=b)) + return mul.out + + assert list_fields(MyTestWorkflow) == [ + workflow.arg(name="a", type=int), + workflow.arg(name="b", type=float), + workflow.arg( + name="constructor", type=ty.Callable, default=MyTestWorkflow().constructor + ), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out", type=float), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out == LazyField(name="Mul", field="out", type=ty.Any) + assert list(wf.node_names) == ["Add", "Mul"] diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index ce9946f1b2..0d97a58658 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -2,7 +2,7 @@ import inspect import attrs from pydra.engine.task import FunctionTask -from pydra.engine.core import Workflow, AuditFlag +from pydra.engine.workflow import Workflow from .base import ( Arg, Out, @@ -16,7 +16,7 @@ ) -__all__ = ["arg", "out", "define", "this", "add", "Node", "WorkflowSpec"] +__all__ = ["define", "add", "this", "arg", "out"] @attrs.define @@ -169,59 +169,11 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: OutputType = ty.TypeVar("OutputType") -class WorkflowSpec(TaskSpec[OutputType]): - - under_construction: Workflow = None - - def __construct__( - self, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir: ty.Any | None = None, - cache_locations: ty.Any | None = None, - cont_dim: ty.Any | None = None, - messenger_args: ty.Any | None = None, - messengers: ty.Any | None = None, - rerun: bool = False, - propagate_rerun: bool = True, - ) -> OutputType: - wf = self.under_construction = Workflow( - name=type(self).__name__, - inputs=self, - audit_flags=audit_flags, - cache_dir=cache_dir, - cache_locations=cache_locations, - cont_dim=cont_dim, - messenger_args=messenger_args, - messengers=messengers, - rerun=rerun, - propagate_rerun=propagate_rerun, - ) - - try: - output_fields = self.construct(**attrs.asdict(self)) - finally: - self.under_construction = None - - wf.outputs = self.Outputs( - **dict( - zip( - (f.name for f in attrs.fields(self.Outputs)), - output_fields, - ) - ) - ) - return wf - - def this() -> Workflow: - return WorkflowSpec.under_construction + """Get the workflow currently being constructed.""" + return Workflow.under_construction def add(task_spec: TaskSpec[OutputType]) -> OutputType: + """Add a task to the current workflow.""" return this().add(task_spec) - - -@attrs.define -class Node: - task_spec: TaskSpec - splitter: str | list[str] | None = None diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index c532bc7793..6fbd7a0063 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -1,14 +1,12 @@ """The core of the workflow engine.""" from .submitter import Submitter -from .core import Workflow import __main__ import logging from ._version import __version__ __all__ = [ "Submitter", - "Workflow", "logger", "check_latest_version", ] diff --git a/pydra/engine/core.py b/pydra/engine/core.py index c8b0089cbe..febe253423 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -1030,7 +1030,7 @@ def _sanitize_spec( raise ValueError(f'Empty "{spec_name}" spec provided to Workflow {wf_name}.') -class Workflow(Task): +class WorkflowTask(Task): """A composite task with structure of computational graph.""" def __init__( @@ -1534,7 +1534,7 @@ def is_task(obj): def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" - return isinstance(obj, Workflow) + return isinstance(obj, WorkflowTask) def is_lazy(obj): diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow.py new file mode 100644 index 0000000000..6ee790699a --- /dev/null +++ b/pydra/engine/workflow.py @@ -0,0 +1,204 @@ +import typing as ty +from copy import copy +from operator import itemgetter +from typing_extensions import Self +import attrs +from pydra.design.base import list_fields, TaskSpec +from pydra.engine.specs import LazyField, StateArray +from pydra.utils.hash import hash_function + + +OutputType = ty.TypeVar("OutputType") + + +@attrs.define +class Node(ty.Generic[OutputType]): + """A node in a workflow + + Parameters + ---------- + name : str + The name of the node + inputs : TaskSpec + The specification of the node + """ + + name: str + inputs: TaskSpec[OutputType] + _splitter: str | list[str] | tuple[str] | None = None + _combiner: list | str = None + _workflow: "Workflow" = None + + @property + def lzout(self) -> OutputType: + """The output spec of the node populated with lazy fields""" + return self.inputs.Outputs( + **{ + f.name: LazyField(name=self.name, field=f.name, type=f.type) + for f in list_fields(self.inputs.Outputs) + } + ) + + def split(self, splitter=None, /, **inputs) -> None: + """Split the node over the specified inputs + + Parameters + ---------- + splitter : str | list[str] | tuple[str], optional + The input field(s) to split over. If a list then an "outer" product + split is performed over all the fields (all combinations). If a tuple then a + the input values must be the same length and "inner" product split is + performed over the fields (pairs of combinations). If a splitter is not provided + then all the inputs are taken to be an outer product split. + **inputs + The input values to split over + """ + if self._splitter is not None: + raise ValueError(f"Splitter already set to {self._splitter!r}") + self._splitter = splitter or list(inputs) + for name, value in inputs.items(): + setattr(self.inputs, name, StateArray(value)) + + def combine(self, combiner: list | str) -> None: + """Combine the node over the specified inputs + + Parameters + ---------- + combiner : list | str + Either a single field or a list of fields to combine in the node + """ + self._combiner = combiner + + +@attrs.define(auto_attribs=False) +class Workflow(ty.Generic[OutputType]): + """A workflow, constructed from a workflow specification + + Parameters + ---------- + name : str + The name of the workflow + inputs : TaskSpec + The input specification of the workflow + outputs : TaskSpec + The output specification of the workflow + """ + + name: str = attrs.field() + inputs: TaskSpec[OutputType] = attrs.field() + outputs: OutputType = attrs.field() + _nodes: dict[str, Node] = attrs.field(factory=dict) + + @classmethod + def construct( + cls, + spec: TaskSpec[OutputType], + ) -> Self: + """Construct a workflow from a specification, caching the constructed worklow""" + + lazy_inputs = [f for f in list_fields(type(spec)) if f.lazy] + + # Create a cache key by hashing all the non-lazy input values in the spec + # and use this to store the constructed workflow in case it is reused or nested + # and split over within another workflow + lazy_input_names = {f.name for f in lazy_inputs} + non_lazy_vals = tuple( + sorted( + (i for i in attrs.asdict(spec).items() if i[0] not in lazy_input_names), + key=itemgetter(0), + ) + ) + hash_key = hash_function(non_lazy_vals) + if hash_key in cls._constructed: + return cls._constructed[hash_key] + + # Initialise the outputs of the workflow + outputs = spec.Outputs( + **{f.name: attrs.NOTHING for f in attrs.fields(spec.Outputs)} + ) + + # Initialise the lzin fields + lazy_spec = copy(spec) + wf = cls.under_construction = Workflow( + name=type(spec).__name__, + inputs=lazy_spec, + outputs=outputs, + ) + for lzy_inpt in lazy_inputs: + setattr( + lazy_spec, + lzy_inpt, + LazyField( + wf.name, # This shouldn't be the name of the workflow, but this is currently + lzy_inpt.name, + lzy_inpt.type, + ), + ) + + input_values = attrs.asdict(lazy_spec) + constructor = input_values.pop("constructor") + cls._under_construction = wf + try: + # Call the user defined constructor to set the outputs + output_values = constructor(**input_values) + + # Check that the outputs are set correctly, either directly by the constructor + # returned values that can be zipped with the output names + if output_values: + if not isinstance(output_values, (list, tuple)): + output_values = [output_values] + output_fields = list_fields(spec.Outputs) + if len(output_values) != len(output_fields): + raise ValueError( + f"Expected {len(output_fields)} outputs, got " + f"{len(output_values)} ({output_values})" + ) + for outpt, oupt_val in zip(output_fields, output_values): + setattr(outputs, outpt.name, oupt_val) + else: + if unset_outputs := { + a: v for a, v in attrs.asdict(outputs).items() if v is attrs.NOTHING + }: + raise ValueError( + f"Expected outputs {list(unset_outputs)} to be set by the " + f"constructor of {wf!r}" + ) + finally: + cls._under_construction = None + + cls._constructed[hash_key] = wf + + return wf + + def add(self, task_spec: TaskSpec[OutputType], name=None) -> OutputType: + if name is None: + name = type(task_spec).__name__ + if name in self._nodes: + raise ValueError(f"Node with name {name!r} already exists in the workflow") + node = Node[OutputType](name=name, inputs=task_spec, workflow=self) + self._nodes[name] = node + return node.lzout + + def __getitem__(self, key: str) -> Node: + return self._nodes[key] + + @property + def nodes(self) -> ty.Iterable[Node]: + return self._nodes.values() + + @property + def node_names(self) -> list[str]: + return list(self._nodes) + + @property + @classmethod + def under_construction(cls) -> "Workflow[ty.Any]": + if cls._under_construction is None: + raise ValueError( + "pydra.design.workflow.this() can only be called from within a workflow " + "constructor function" + ) + return cls._under_construction + + _under_construction: "Workflow[ty.Any]" = None + _constructed: dict[int, "Workflow[ty.Any]"] = {} From acde20681a67a04ddcbc349d90151890206b2ed4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 16:00:54 +1100 Subject: [PATCH 037/342] added check for NOTHING values and auto added to ext --- pydra/design/base.py | 8 ++++++++ pydra/design/shell.py | 18 +++++++----------- pydra/design/tests/test_shell.py | 2 +- pydra/engine/workflow.py | 28 +++++++++++++++++++++------- 4 files changed, 37 insertions(+), 19 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index ef3c7b0b9d..62870bfa27 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -170,6 +170,7 @@ def __call__( rerun=False, **kwargs, ): + self._check_for_unset_values() task = self.Task( self, name=name, @@ -184,6 +185,13 @@ def __call__( ) return task(**kwargs) + def _check_for_unset_values(self): + if unset := [k for k, v in attrs.asdict(self).items() if v is attrs.NOTHING]: + raise ValueError( + f"The following values in the {self!r} interface need to be set before it " + f"can be executed: {unset}" + ) + def get_fields_from_class( klass: type, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index fb0777f991..c39cc225dc 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -8,7 +8,7 @@ from copy import copy import attrs import builtins -from fileformats.core import from_mime +from fileformats.core import from_mime, FileSet from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError from .base import ( @@ -466,16 +466,12 @@ def from_type_str(type_str) -> type: type_ |= None # Make the arguments optional kwds = {"type": type_} # If name contains a '.', treat it as a file template and strip it from the name - if "." in name: - if field_type is not outarg: - raise ValueError( - f"File template fields (i.e. with '.' in their names) can only " - f"be used with file types, not {type_} and {field_type}" - ) - kwds["path_template"] = name - name = name.split(".")[0] - elif field_type is outarg: - kwds["path_template"] = name + if field_type is outarg: + kwds["path_template"] = ( + name + type_.ext + if issubclass(type_, FileSet) and type_.ext + else name + ) if ty.get_origin(type_) is MultiInputObj: kwds["sep"] = " " if option is None: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 4dedb90e23..cf9ea7db8a 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -36,7 +36,7 @@ def test_interface_template(): def test_interface_template_w_types_and_path_template_ext(): SampleInterface = shell.define( - "trim-png " + "trim-png " ) assert issubclass(SampleInterface, TaskSpec) diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow.py index 6ee790699a..16060ce731 100644 --- a/pydra/engine/workflow.py +++ b/pydra/engine/workflow.py @@ -1,4 +1,5 @@ import typing as ty +import enum from copy import copy from operator import itemgetter from typing_extensions import Self @@ -129,7 +130,7 @@ def construct( lazy_spec, lzy_inpt, LazyField( - wf.name, # This shouldn't be the name of the workflow, but this is currently + WORKFLOW_LZIN, lzy_inpt.name, lzy_inpt.type, ), @@ -141,9 +142,11 @@ def construct( try: # Call the user defined constructor to set the outputs output_values = constructor(**input_values) - + # Check to see whether any mandatory inputs are not set + for node in wf.nodes: + node.inputs._check_for_unset_values() # Check that the outputs are set correctly, either directly by the constructor - # returned values that can be zipped with the output names + # or via returned values that can be zipped with the output names if output_values: if not isinstance(output_values, (list, tuple)): output_values = [output_values] @@ -156,11 +159,11 @@ def construct( for outpt, oupt_val in zip(output_fields, output_values): setattr(outputs, outpt.name, oupt_val) else: - if unset_outputs := { - a: v for a, v in attrs.asdict(outputs).items() if v is attrs.NOTHING - }: + if unset_outputs := [ + a for a, v in attrs.asdict(outputs).items() if v is attrs.NOTHING + ]: raise ValueError( - f"Expected outputs {list(unset_outputs)} to be set by the " + f"Expected outputs {unset_outputs} to be set by the " f"constructor of {wf!r}" ) finally: @@ -202,3 +205,14 @@ def under_construction(cls) -> "Workflow[ty.Any]": _under_construction: "Workflow[ty.Any]" = None _constructed: dict[int, "Workflow[ty.Any]"] = {} + + +class _WorkflowLzin(enum.Enum): + + WORKFLOW_LZIN = enum.auto() + + def __repr__(self): + return "WORKFLOW_LZIN" + + +WORKFLOW_LZIN = _WorkflowLzin.WORKFLOW_LZIN From 07cf5a8d7d80c24c08774143932bb83134750d44 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 18:27:44 +1100 Subject: [PATCH 038/342] added new workflow tests --- pydra/design/tests/test_workflow.py | 106 +++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 3 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index d180569d0e..1c8566aff6 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -7,7 +7,7 @@ def test_workflow(): @workflow.define - def MyTestWorkflow(a: int, b: float) -> float: + def MyTestWorkflow(a, b): @python.define def Add(a, b): @@ -21,6 +21,104 @@ def Mul(a, b): mul = workflow.add(Mul(a=add.out, b=b)) return mul.out + constructor = MyTestWorkflow().constructor + assert constructor.__name__ == "MyTestWorkflow" + assert list_fields(MyTestWorkflow) == [ + workflow.arg(name="a"), + workflow.arg(name="b"), + workflow.arg(name="constructor", type=ty.Callable, default=constructor), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out"), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out == LazyField(name="Mul", field="out", type=ty.Any) + assert list(wf.node_names) == ["Add", "Mul"] + + +def test_workflow_alt_syntax(): + + @workflow.define(outputs=["out1", "out2"]) + def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: + """A test workflow demonstration a few alternative ways to set and connect nodes + + Args: + a: An integer input + b: A float input + + Returns: + out1: The first output + out2: The second output + """ + + @python.define(inputs={"x": float}, outputs={"out": float}) + def Add(x, y): + return x + y + + def Mul(x, y): + return x * y + + @python.define(outputs=["divided"]) + def Divide(x, y): + return x / y + + wf = workflow.this() + + add = wf.add(Add(x=a, y=b), name="addition") + mul = wf.add(python.define(Mul, outputs={"out": float})(x=add.out, y=b)) + divide = wf.add(Divide(x=wf["addition"].lzout.out, y=mul.out), name="division") + + # Alter one of the inputs to a node after it has been initialised + wf["Mul"].inputs.y *= 2 + + return mul.out, divide.divided + + assert list_fields(MyTestWorkflow) == [ + workflow.arg(name="a", type=int, help_string="An integer input"), + workflow.arg(name="b", type=float, help_string="A float input"), + workflow.arg( + name="constructor", type=ty.Callable, default=MyTestWorkflow().constructor + ), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out1", type=float, help_string="The first output"), + workflow.out(name="out2", type=float, help_string="The second output"), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out1 == LazyField(name="Mul", field="out", type=float) + assert wf.outputs.out2 == LazyField(name="division", field="divided", type=ty.Any) + assert list(wf.node_names) == ["addition", "Mul", "division"] + + +def test_workflow_set_outputs_directly(): + + @workflow.define(outputs={"out1": float, "out2": float}) + def MyTestWorkflow(a: int, b: float): + + @python.define + def Add(a, b): + return a + b + + @python.define + def Mul(a, b): + return a * b + + wf = workflow.this() + + add = wf.add(Add(a=a, b=b)) + wf.add(Mul(a=add.out, b=b)) + + wf.outputs.out2 = add.out # Using the returned lzout outputs + wf.outputs.out1 = wf["Mul"].lzout.out # accessing the lzout outputs via getitem + + # no return required when the outputs are set directly + assert list_fields(MyTestWorkflow) == [ workflow.arg(name="a", type=int), workflow.arg(name="b", type=float), @@ -29,11 +127,13 @@ def Mul(a, b): ), ] assert list_fields(MyTestWorkflow.Outputs) == [ - workflow.out(name="out", type=float), + workflow.out(name="out1", type=float), + workflow.out(name="out2", type=float), ] workflow_spec = MyTestWorkflow(a=1, b=2.0) wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyField(name="Mul", field="out", type=ty.Any) + assert wf.outputs.out1 == LazyField(name="Mul", field="out", type=ty.Any) + assert wf.outputs.out2 == LazyField(name="Add", field="out", type=ty.Any) assert list(wf.node_names) == ["Add", "Mul"] From 9f7a42e74955e11afa4c9a5a561f2dd7deaf8983 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 18:28:08 +1100 Subject: [PATCH 039/342] added check for unrecognised inputs --- pydra/design/base.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 62870bfa27..f86b997490 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -477,6 +477,11 @@ def extract_function_inputs_and_outputs( f"Input names ({inputs}) should not be provided when " "wrapping/decorating a function as " ) + if unrecognised := set(inputs) - set(input_types): + raise ValueError( + f"Unrecognised input names ({unrecognised}) not present in the signature " + f"of the function {function!r}" + ) for inpt_name, type_ in input_types.items(): try: inpt = inputs[inpt_name] @@ -516,12 +521,12 @@ def extract_function_inputs_and_outputs( f"return value {return_type}" ) return_types = ty.get_args(return_type) - if len(return_types) != len(outputs): - raise ValueError( - f"Length of the outputs ({outputs}) does not match that " - f"of the return types ({return_types})" - ) - output_types = dict(zip(outputs, return_types)) + if len(return_types) != len(outputs): + raise ValueError( + f"Length of the outputs ({outputs}) does not match that " + f"of the return types ({return_types})" + ) + output_types = dict(zip(outputs, return_types)) if isinstance(outputs, dict): for output_name, output in outputs.items(): if isinstance(output, Out) and output.type is ty.Any: From 2d4f030000f65ceb98189a21e06880d5896f88f2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 18:28:28 +1100 Subject: [PATCH 040/342] added custom hash for function types based on function source --- pydra/utils/hash.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 3ba3e97b44..de2b94a9f0 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -3,6 +3,7 @@ import sys import os import struct +import inspect from datetime import datetime import typing as ty import types @@ -519,6 +520,13 @@ def bytes_repr_set(obj: Set, cache: Cache) -> Iterator[bytes]: yield b"}" +@register_serializer +def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes]: + yield b"function:(" + yield hash_single(inspect.getsource(obj), cache) + yield b")" + + def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[bytes]: """Serialize the contents of a mapping From 35c952d1cd4002149b5675da4131bb43e4e7572a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 18 Nov 2024 20:43:51 +1100 Subject: [PATCH 041/342] added shell workflow and debugged shell task definitions --- pydra/design/base.py | 7 ++-- pydra/design/shell.py | 18 ++++++---- pydra/design/tests/test_workflow.py | 53 +++++++++++++++++++++++++++++ pydra/design/workflow.py | 4 +-- 4 files changed, 70 insertions(+), 12 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index f86b997490..35db82ca90 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -503,13 +503,12 @@ def extract_function_inputs_and_outputs( src = inspect.getsource(function).strip() return_lines = re.findall(r"\n\s+return .*$", src) if len(return_lines) == 1 and src.endswith(return_lines[0]): + return_line = return_lines[0].split("#")[0] implicit_outputs = [ o.strip() - for o in re.match(r"\s*return\s+(.*)", return_lines[0]) - .group(1) - .split(",") + for o in re.match(r"\s*return\s+(.*)", return_line).group(1).split(",") ] - if len(implicit_outputs) > 1 and all( + if len(implicit_outputs) and all( re.match(r"^\w+$", o) for o in implicit_outputs ): outputs = implicit_outputs diff --git a/pydra/design/shell.py b/pydra/design/shell.py index c39cc225dc..24ce54a7ec 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -8,7 +8,7 @@ from copy import copy import attrs import builtins -from fileformats.core import from_mime, FileSet +from fileformats.core import from_mime from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError from .base import ( @@ -21,6 +21,7 @@ make_task_spec, EMPTY, ) +from pydra.utils.typing import is_fileset_or_union from pydra.engine.specs import MultiInputObj from pydra.engine.task import ShellCommandTask @@ -467,11 +468,15 @@ def from_type_str(type_str) -> type: kwds = {"type": type_} # If name contains a '.', treat it as a file template and strip it from the name if field_type is outarg: - kwds["path_template"] = ( - name + type_.ext - if issubclass(type_, FileSet) and type_.ext - else name - ) + path_template = name + if is_fileset_or_union(type_): + if ty.get_origin(type_): + ext_type = next(a for a in ty.get_args(type_) if a is not None) + else: + ext_type = type_ + if ext_type.ext is not None: + path_template = name + ext_type.ext + kwds["path_template"] = path_template if ty.get_origin(type_) is MultiInputObj: kwds["sep"] = " " if option is None: @@ -479,6 +484,7 @@ def from_type_str(type_str) -> type: else: kwds["argstr"] = option add_arg(name, field_type, kwds) + option = None elif match := bool_arg_re.match(token): argstr, var = match.groups() add_arg(var, arg, {"type": bool, "argstr": argstr, "default": False}) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 1c8566aff6..0d26dde34f 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -2,6 +2,7 @@ from pydra.engine.specs import LazyField import typing as ty from pydra.design import shell, python, workflow, list_fields +from fileformats import video, image def test_workflow(): @@ -39,6 +40,58 @@ def Mul(a, b): assert list(wf.node_names) == ["Add", "Mul"] +def test_shell_workflow(): + + @workflow.define + def MyTestShellWorkflow(input_video: video.Mp4, watermark: image.Png) -> video.Mp4: + + add_watermark = workflow.add( + shell.define( + "ffmpeg -i -i -filter_complex " + )(in_video=input_video, watermark=watermark, filter="overlay=10:10"), + name="add_watermark", + ) + output_video = workflow.add( + shell.define( + ( + "HandBrakeCLI -i -o " + "--width --height " + ), + # this specifies that this output is required even though it has a flag, + # optional inputs and outputs are of type * | None + inputs={"in_video": video.Mp4}, + outputs={"out_video": video.Mp4}, + )(in_video=add_watermark.out_video, width=1280, height=720), + name="resize", + ).out_video + + return output_video # test implicit detection of output name + + constructor = MyTestShellWorkflow().constructor + assert constructor.__name__ == "MyTestShellWorkflow" + assert list_fields(MyTestShellWorkflow) == [ + workflow.arg(name="input_video", type=video.Mp4), + workflow.arg(name="watermark", type=image.Png), + workflow.arg(name="constructor", type=ty.Callable, default=constructor), + ] + assert list_fields(MyTestShellWorkflow.Outputs) == [ + workflow.out(name="output_video", type=video.Mp4), + ] + input_video = video.Mp4.mock("input.mp4") + watermark = image.Png.mock("watermark.png") + workflow_spec = MyTestShellWorkflow( + input_video=input_video, + watermark=watermark, + ) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.input_video == input_video + assert wf.inputs.watermark == watermark + assert wf.outputs.output_video == LazyField( + name="resize", field="out_video", type=video.Mp4 + ) + assert list(wf.node_names) == ["add_watermark", "resize"] + + def test_workflow_alt_syntax(): @workflow.define(outputs=["out1", "out2"]) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 0d97a58658..0e8f9b8240 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -174,6 +174,6 @@ def this() -> Workflow: return Workflow.under_construction -def add(task_spec: TaskSpec[OutputType]) -> OutputType: +def add(task_spec: TaskSpec[OutputType], name: str = None) -> OutputType: """Add a task to the current workflow.""" - return this().add(task_spec) + return this().add(task_spec, name=name) From ac08c185f1c8cfb708ef76b72d51ed4aad48d624 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 19 Nov 2024 13:56:42 +1100 Subject: [PATCH 042/342] added cononical workflow test --- pydra/design/base.py | 10 +- pydra/design/shell.py | 2 +- pydra/design/tests/test_workflow.py | 243 ++++++++++++++++++++------ pydra/design/workflow.py | 4 +- pydra/engine/core.py | 6 +- pydra/engine/specs.py | 1 + pydra/engine/workflow.py | 253 ++++++++++++++++++++++++---- pydra/utils/typing.py | 1 + 8 files changed, 433 insertions(+), 87 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 35db82ca90..66ee51cab9 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -186,10 +186,14 @@ def __call__( return task(**kwargs) def _check_for_unset_values(self): - if unset := [k for k, v in attrs.asdict(self).items() if v is attrs.NOTHING]: + if unset := [ + k + for k, v in attrs.asdict(self, recurse=False).items() + if v is attrs.NOTHING + ]: raise ValueError( - f"The following values in the {self!r} interface need to be set before it " - f"can be executed: {unset}" + f"The following values {unset} in the {self!r} interface need to be set " + "before the workflow can be constructed" ) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 24ce54a7ec..c14a514d84 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -413,7 +413,7 @@ def add_arg(name, field_type, kwds, is_option=False): kwds["type"] = field field = field_type(name=name, **kwds) elif not isinstance(field, field_type): # If field type is outarg not out - field = field_type(**attrs.asdict(field)) + field = field_type(**attrs.asdict(field, recurse=False)) field.name = name type_ = kwds.pop("type", field.type) if field.type is ty.Any: diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 0d26dde34f..c1ea2a4fc7 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -1,29 +1,37 @@ +from operator import attrgetter +import pytest +import attrs from pydra.engine.workflow import Workflow from pydra.engine.specs import LazyField import typing as ty -from pydra.design import shell, python, workflow, list_fields +from pydra.design import shell, python, workflow, list_fields, TaskSpec from fileformats import video, image def test_workflow(): - @workflow.define - def MyTestWorkflow(a, b): + # NB: We use PascalCase (i.e. class names) as it is translated into a class - @python.define - def Add(a, b): - return a + b + @python.define + def Add(a, b): + return a + b - @python.define - def Mul(a, b): - return a * b + @python.define + def Mul(a, b): + return a * b + @workflow.define + def MyTestWorkflow(a, b): add = workflow.add(Add(a=a, b=b)) mul = workflow.add(Mul(a=add.out, b=b)) return mul.out constructor = MyTestWorkflow().constructor assert constructor.__name__ == "MyTestWorkflow" + + # The constructor function is included as a part of the specification so it is + # included in the hash by default and can be overridden if needed. Not 100% sure + # if this is a good idea or not assert list_fields(MyTestWorkflow) == [ workflow.arg(name="a"), workflow.arg(name="b"), @@ -36,29 +44,42 @@ def Mul(a, b): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyField(name="Mul", field="out", type=ty.Any) + assert wf.outputs.out == LazyField( + name="Mul", field="out", type=ty.Any, type_checked=True + ) + + # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] def test_shell_workflow(): @workflow.define - def MyTestShellWorkflow(input_video: video.Mp4, watermark: image.Png) -> video.Mp4: + def MyTestShellWorkflow( + input_video: video.Mp4, + watermark: image.Png, + watermark_dims: tuple[int, int] = (10, 10), + ) -> video.Mp4: add_watermark = workflow.add( shell.define( - "ffmpeg -i -i -filter_complex " - )(in_video=input_video, watermark=watermark, filter="overlay=10:10"), + "ffmpeg -i -i " + "-filter_complex " + )( + in_video=input_video, + watermark=watermark, + filter="overlay={}:{}".format(*watermark_dims), + ), name="add_watermark", ) output_video = workflow.add( shell.define( - ( - "HandBrakeCLI -i -o " - "--width --height " - ), - # this specifies that this output is required even though it has a flag, - # optional inputs and outputs are of type * | None + "HandBrakeCLI -i -o " + "--width --height ", + # By default any input/output specified with a flag (e.g. -i ) + # is considered optional, i.e. of type `FsObject | None`, and therefore + # won't be used by default. By overriding this with non-optional types, + # the fields are specified as being required. inputs={"in_video": video.Mp4}, outputs={"out_video": video.Mp4}, )(in_video=add_watermark.out_video, width=1280, height=720), @@ -72,6 +93,7 @@ def MyTestShellWorkflow(input_video: video.Mp4, watermark: image.Png) -> video.M assert list_fields(MyTestShellWorkflow) == [ workflow.arg(name="input_video", type=video.Mp4), workflow.arg(name="watermark", type=image.Png), + workflow.arg(name="watermark_dims", type=tuple[int, int], default=(10, 10)), workflow.arg(name="constructor", type=ty.Callable, default=constructor), ] assert list_fields(MyTestShellWorkflow.Outputs) == [ @@ -87,12 +109,86 @@ def MyTestShellWorkflow(input_video: video.Mp4, watermark: image.Png) -> video.M assert wf.inputs.input_video == input_video assert wf.inputs.watermark == watermark assert wf.outputs.output_video == LazyField( - name="resize", field="out_video", type=video.Mp4 + name="resize", field="out_video", type=video.Mp4, type_checked=True ) assert list(wf.node_names) == ["add_watermark", "resize"] -def test_workflow_alt_syntax(): +def test_workflow_canonical(): + + # NB: We use PascalCase (i.e. class names) as it is translated into a class + + @python.define + def Add(a, b): + return a + b + + @python.define + def Mul(a, b): + return a * b + + def a_converter(value): + if value is attrs.NOTHING: + return value + return float(value) + + @workflow.define + class MyTestWorkflow(TaskSpec["MyTestWorkflow.Outputs"]): + + a: int + b: float = workflow.arg( + help_string="A float input", + converter=a_converter, + ) + + @staticmethod + def constructor(a, b): + add = workflow.add(Add(a=a, b=b)) + mul = workflow.add(Mul(a=add.out, b=b)) + return mul.out + + class Outputs: + out: float + + constructor = MyTestWorkflow().constructor + assert constructor.__name__ == "constructor" + + # The constructor function is included as a part of the specification so it is + # included in the hash by default and can be overridden if needed. Not 100% sure + # if this is a good idea or not + assert sorted(list_fields(MyTestWorkflow), key=attrgetter("name")) == [ + workflow.arg(name="a", type=int), + workflow.arg( + name="b", type=float, help_string="A float input", converter=a_converter + ), + workflow.arg(name="constructor", type=ty.Callable, default=constructor), + ] + assert list_fields(MyTestWorkflow.Outputs) == [ + workflow.out(name="out", type=float), + ] + workflow_spec = MyTestWorkflow(a=1, b=2.0) + wf = Workflow.construct(workflow_spec) + assert wf.inputs.a == 1 + assert wf.inputs.b == 2.0 + assert wf.outputs.out == LazyField( + name="Mul", field="out", type=ty.Any, type_checked=True + ) + + # Nodes are named after the specs by default + assert list(wf.node_names) == ["Add", "Mul"] + + +def test_direct_access_of_workflow_object(): + + @python.define(inputs={"x": float}, outputs={"z": float}) + def Add(x, y): + return x + y + + def Mul(x, y): + return x * y + + @python.define(outputs=["divided"]) + def Divide(x, y): + return x / y @workflow.define(outputs=["out1", "out2"]) def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: @@ -107,22 +203,11 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: out2: The second output """ - @python.define(inputs={"x": float}, outputs={"out": float}) - def Add(x, y): - return x + y - - def Mul(x, y): - return x * y - - @python.define(outputs=["divided"]) - def Divide(x, y): - return x / y - wf = workflow.this() add = wf.add(Add(x=a, y=b), name="addition") - mul = wf.add(python.define(Mul, outputs={"out": float})(x=add.out, y=b)) - divide = wf.add(Divide(x=wf["addition"].lzout.out, y=mul.out), name="division") + mul = wf.add(python.define(Mul, outputs={"out": float})(x=add.z, y=b)) + divide = wf.add(Divide(x=wf["addition"].lzout.z, y=mul.out), name="division") # Alter one of the inputs to a node after it has been initialised wf["Mul"].inputs.y *= 2 @@ -144,33 +229,38 @@ def Divide(x, y): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyField(name="Mul", field="out", type=float) - assert wf.outputs.out2 == LazyField(name="division", field="divided", type=ty.Any) + assert wf.outputs.out1 == LazyField( + name="Mul", field="out", type=float, type_checked=True + ) + assert wf.outputs.out2 == LazyField( + name="division", field="divided", type=ty.Any, type_checked=True + ) assert list(wf.node_names) == ["addition", "Mul", "division"] def test_workflow_set_outputs_directly(): - @workflow.define(outputs={"out1": float, "out2": float}) - def MyTestWorkflow(a: int, b: float): + @python.define + def Add(a, b): + return a + b - @python.define - def Add(a, b): - return a + b + @python.define + def Mul(a, b): + return a * b - @python.define - def Mul(a, b): - return a * b + @workflow.define(outputs={"out1": float, "out2": float}) + def MyTestWorkflow(a: int, b: float): wf = workflow.this() - add = wf.add(Add(a=a, b=b)) wf.add(Mul(a=add.out, b=b)) + # Set the outputs of the workflow directly instead of returning them them in + # a tuple wf.outputs.out2 = add.out # Using the returned lzout outputs wf.outputs.out1 = wf["Mul"].lzout.out # accessing the lzout outputs via getitem - # no return required when the outputs are set directly + # no return is used when the outputs are set directly assert list_fields(MyTestWorkflow) == [ workflow.arg(name="a", type=int), @@ -187,6 +277,63 @@ def Mul(a, b): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyField(name="Mul", field="out", type=ty.Any) - assert wf.outputs.out2 == LazyField(name="Add", field="out", type=ty.Any) + assert wf.outputs.out1 == LazyField( + name="Mul", field="out", type=ty.Any, type_checked=True + ) + assert wf.outputs.out2 == LazyField( + name="Add", field="out", type=ty.Any, type_checked=True + ) assert list(wf.node_names) == ["Add", "Mul"] + + +def test_workflow_split_combine(): + + @python.define + def Mul(x: float, y: float) -> float: + return x * y + + @python.define + def Sum(x: list[float]) -> float: + return sum(x) + + @workflow.define + def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: + + wf = workflow.this() + mul = wf.add(Mul()) + # We could avoid having to specify the splitter and combiner on a separate + # line by making 'split' and 'combine' reserved keywords for Outputs class attrs + wf["Mul"].split(x=a, y=b).combine("a") + sum = wf.add(Sum(x=mul.out)) + return sum.out + + wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) + assert wf["Mul"]._state.splitter == ["x", "y"] + assert wf["Mul"]._state.combiner == ["x"] + + +def test_workflow_split_after_access_fail(): + """It isn't possible to split/combine a node after one of its outputs has been type + checked as this changes the type of the outputs and renders the type checking + invalid + """ + + @python.define + def Add(x, y): + return x + y + + @python.define + def Mul(x, y): + return x * y + + @workflow.define + def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: + + wf = workflow.this() + add = wf.add(Add()) + mul = wf.add(Mul(x=add.out, y=2.0)) # << Add.out is accessed here + wf["Add"].split(x=a, y=b).combine("x") + return mul.out + + with pytest.raises(RuntimeError, match="Outputs .* have already been accessed"): + Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 0e8f9b8240..506260cbbb 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -1,7 +1,7 @@ import typing as ty import inspect import attrs -from pydra.engine.task import FunctionTask +from pydra.engine.core import WorkflowTask from pydra.engine.workflow import Workflow from .base import ( Arg, @@ -148,7 +148,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: parsed_inputs[inpt_name].lazy = True interface = make_task_spec( - FunctionTask, + WorkflowTask, parsed_inputs, parsed_outputs, name=name, diff --git a/pydra/engine/core.py b/pydra/engine/core.py index febe253423..13123cb02a 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -85,7 +85,7 @@ class Task: def __init__( self, - interface, + spec, name: str | None = None, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, @@ -138,7 +138,7 @@ def __init__( if Task._etelemetry_version_data is None: Task._etelemetry_version_data = check_latest_version() - self.interface = interface + self.interface = spec # raise error if name is same as of attributes if name in dir(self): raise ValueError("Cannot use names of attributes or methods as task name") @@ -269,7 +269,7 @@ def checksum(self): def checksum_states(self, state_index=None): """ Calculate a checksum for the specific state or all of the states of the task. - Replaces lists in the inputs fields with a specific values for states. + Replaces state-arrays in the inputs fields with a specific values for states. Used to recreate names of the task directories, Parameters diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 47a2f38686..57fbb80833 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -817,6 +817,7 @@ class LazyField(ty.Generic[T]): factory=frozenset, converter=frozenset ) cast_from: ty.Optional[ty.Type[ty.Any]] = None + type_checked: bool = False def __bytes_repr__(self, cache): yield type(self).__name__.encode() diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow.py index 16060ce731..f9058cf152 100644 --- a/pydra/engine/workflow.py +++ b/pydra/engine/workflow.py @@ -1,12 +1,15 @@ import typing as ty import enum -from copy import copy +from copy import copy, deepcopy from operator import itemgetter from typing_extensions import Self import attrs from pydra.design.base import list_fields, TaskSpec from pydra.engine.specs import LazyField, StateArray from pydra.utils.hash import hash_function +from . import helpers_state as hlpst +from .helpers import ensure_list +from . import state OutputType = ty.TypeVar("OutputType") @@ -29,46 +32,230 @@ class Node(ty.Generic[OutputType]): _splitter: str | list[str] | tuple[str] | None = None _combiner: list | str = None _workflow: "Workflow" = None + _lzout: OutputType | None = None + _state: state.State | None = None + _cont_dim: dict[str, int] | None = None @property def lzout(self) -> OutputType: """The output spec of the node populated with lazy fields""" - return self.inputs.Outputs( - **{ - f.name: LazyField(name=self.name, field=f.name, type=f.type) - for f in list_fields(self.inputs.Outputs) - } - ) - - def split(self, splitter=None, /, **inputs) -> None: - """Split the node over the specified inputs + if self._lzout is not None: + return self._lzout + state_array_depth = 0 + for inpt_name, inpt_val in attrs.asdict(self.inputs).items(): + if isinstance(inpt_val, LazyField): + tp = inpt_val.type + depth = 0 + while isinstance(tp, StateArray): + tp = ty.get_args(tp)[0] + depth += 1 + # FIXME: This won't be able to differentiate between state arrays + # from different splits and those from the same split, we might need to + # keep track of that in the LazyField... (am I right about this??) + state_array_depth = max(state_array_depth, depth) + lazy_fields = {} + for field in list_fields(self.inputs.Outputs): + # TODO: need to reimplement the full spliter/combiner logic here + if self._splitter and field.name in self._splitter: + if field.name in self._combiner: + type_ = list[field.type] + else: + type_ = StateArray(field.type) + else: + type_ = field.type + for _ in range(state_array_depth): + type_ = StateArray[type_] + lazy_fields[field.name] = LazyField( + name=self.name, + field=field.name, + type=type_, + ) + outputs = self.inputs.Outputs(**lazy_fields) + # Flag the output lazy fields as being not typed checked (i.e. assigned to another + # node's inputs) yet + for outpt in attrs.asdict(outputs, recurse=False).values(): + outpt.type_checked = False + self._lzout = outputs + return outputs + + def split( + self, + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, + /, + overwrite: bool = False, + cont_dim: ty.Optional[dict] = None, + **inputs, + ): + """ + Run this task parametrically over lists of split inputs. Parameters ---------- - splitter : str | list[str] | tuple[str], optional - The input field(s) to split over. If a list then an "outer" product - split is performed over all the fields (all combinations). If a tuple then a - the input values must be the same length and "inner" product split is - performed over the fields (pairs of combinations). If a splitter is not provided - then all the inputs are taken to be an outer product split. + splitter : str or list[str] or tuple[str] or None + the fields which to split over. If splitting over multiple fields, lists of + fields are interpreted as outer-products and tuples inner-products. If None, + then the fields to split are taken from the keyword-arg names. + overwrite : bool, optional + whether to overwrite an existing split on the node, by default False + cont_dim : dict, optional + Container dimensions for specific inputs, used in the splitter. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. **inputs - The input values to split over + fields to split over, will automatically be wrapped in a StateArray object + and passed to the node inputs + + Returns + ------- + self : TaskBase + a reference to the task """ - if self._splitter is not None: - raise ValueError(f"Splitter already set to {self._splitter!r}") - self._splitter = splitter or list(inputs) - for name, value in inputs.items(): - setattr(self.inputs, name, StateArray(value)) + self._check_if_outputs_have_been_used() + if splitter is None and inputs: + splitter = list(inputs) + elif splitter: + missing = set(hlpst.unwrap_splitter(splitter)) - set(inputs) + missing = [m for m in missing if not m.startswith("_")] + if missing: + raise ValueError( + f"Split is missing values for the following fields {list(missing)}" + ) + splitter = hlpst.add_name_splitter(splitter, self.name) + # if user want to update the splitter, overwrite has to be True + if self._state and not overwrite and self._state.splitter != splitter: + raise Exception( + "splitter has been already set, " + "if you want to overwrite it - use overwrite=True" + ) + if cont_dim: + for key, vel in cont_dim.items(): + self._cont_dim[f"{self.name}.{key}"] = vel + if inputs: + new_inputs = {} + split_inputs = set( + f"{self.name}.{n}" if "." not in n else n + for n in hlpst.unwrap_splitter(splitter) + if not n.startswith("_") + ) + for inpt_name, inpt_val in inputs.items(): + new_val: ty.Any + if f"{self.name}.{inpt_name}" in split_inputs: # type: ignore + if isinstance(inpt_val, LazyField): + new_val = inpt_val.split(splitter) + elif isinstance(inpt_val, ty.Iterable) and not isinstance( + inpt_val, (ty.Mapping, str) + ): + new_val = StateArray(inpt_val) + else: + raise TypeError( + f"Could not split {inpt_val} as it is not a sequence type" + ) + else: + new_val = inpt_val + new_inputs[inpt_name] = new_val + # Update the inputs with the new split values + self.inputs = attrs.evolve(self.inputs, **new_inputs) + if not self._state or splitter != self._state.splitter: + self._set_state(splitter) + return self + + def combine( + self, + combiner: ty.Union[ty.List[str], str], + overwrite: bool = False, # **kwargs + ): + """ + Combine inputs parameterized by one or more previous tasks. - def combine(self, combiner: list | str) -> None: - """Combine the node over the specified inputs + Parameters + ---------- + combiner : list[str] or str + the field or list of inputs to be combined (i.e. not left split) after the + task has been run + overwrite : bool + whether to overwrite an existing combiner on the node + **kwargs : dict[str, Any] + values for the task that will be "combined" before they are provided to the + node + + Returns + ------- + self : TaskBase + a reference to the task + """ + if not isinstance(combiner, (str, list)): + raise Exception("combiner has to be a string or a list") + combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) + if ( + self._state + and self._state.combiner + and combiner != self._state.combiner + and not overwrite + ): + raise Exception( + "combiner has been already set, " + "if you want to overwrite it - use overwrite=True" + ) + if not self._state: + self.split(splitter=None) + # a task can have a combiner without a splitter + # if is connected to one with a splitter; + # self.fut_combiner will be used later as a combiner + self._state.fut_combiner = combiner + else: # self.state and not self.state.combiner + self._set_state(splitter=self._state.splitter, combiner=combiner) + return self + + def _set_state(self, splitter, combiner=None): + """ + Set a particular state on this task. Parameters ---------- - combiner : list | str - Either a single field or a list of fields to combine in the node + splitter : str | list[str] | tuple[str] + the fields which to split over. If splitting over multiple fields, lists of + fields are interpreted as outer-products and tuples inner-products. If None, + then the fields to split are taken from the keyword-arg names. + combiner : list[str] | str, optional + the field or list of inputs to be combined (i.e. not left split) after the + task has been run """ - self._combiner = combiner + if splitter is not None: + self._state = state.State( + name=self.name, splitter=splitter, combiner=combiner + ) + else: + self._state = None + return self._state + + @property + def cont_dim(self): + # adding inner_cont_dim to the general container_dimension provided by the users + cont_dim_all = deepcopy(self._cont_dim) + for k, v in self._inner_cont_dim.items(): + cont_dim_all[k] = cont_dim_all.get(k, 1) + v + return cont_dim_all + + @cont_dim.setter + def cont_dim(self, cont_dim): + if cont_dim is None: + self._cont_dim = {} + else: + self._cont_dim = cont_dim + + def _check_if_outputs_have_been_used(self): + used = [] + if self._lzout: + for outpt_name, outpt_val in attrs.asdict( + self._lzout, recurse=False + ).items(): + if outpt_val.type_checked: + used.append(outpt_name) + if used: + raise RuntimeError( + f"Outputs {used} of {self} have already been accessed and therefore cannot " + "be split or combined" + ) @attrs.define(auto_attribs=False) @@ -105,7 +292,11 @@ def construct( lazy_input_names = {f.name for f in lazy_inputs} non_lazy_vals = tuple( sorted( - (i for i in attrs.asdict(spec).items() if i[0] not in lazy_input_names), + ( + i + for i in attrs.asdict(spec, recurse=False).items() + if i[0] not in lazy_input_names + ), key=itemgetter(0), ) ) @@ -136,7 +327,7 @@ def construct( ), ) - input_values = attrs.asdict(lazy_spec) + input_values = attrs.asdict(lazy_spec, recurse=False) constructor = input_values.pop("constructor") cls._under_construction = wf try: @@ -160,7 +351,9 @@ def construct( setattr(outputs, outpt.name, oupt_val) else: if unset_outputs := [ - a for a, v in attrs.asdict(outputs).items() if v is attrs.NOTHING + a + for a, v in attrs.asdict(outputs, recurse=False).items() + if v is attrs.NOTHING ]: raise ValueError( f"Expected outputs {unset_outputs} to be set by the " diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 4276cf411a..ba74d02030 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -216,6 +216,7 @@ def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: "coerced to one that is)" ) from e coerced = obj # type: ignore + obj.type_checked = True # Used to check whether the type can be changed elif isinstance(obj, StateArray): coerced = StateArray(self(o) for o in obj) # type: ignore[assignment] else: From a77be6a6d3492c180c3a07ac6ae9db3143d4f554 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 19 Nov 2024 14:04:40 +1100 Subject: [PATCH 043/342] added lazy inputs test --- pydra/design/tests/test_workflow.py | 54 ++++++++++++++++++++++++++++- pydra/engine/workflow.py | 8 ++--- 2 files changed, 57 insertions(+), 5 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index c1ea2a4fc7..077996a57b 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -1,7 +1,7 @@ from operator import attrgetter import pytest import attrs -from pydra.engine.workflow import Workflow +from pydra.engine.workflow import Workflow, WORKFLOW_LZIN from pydra.engine.specs import LazyField import typing as ty from pydra.design import shell, python, workflow, list_fields, TaskSpec @@ -115,6 +115,7 @@ def MyTestShellWorkflow( def test_workflow_canonical(): + """Test class-based workflow definition""" # NB: We use PascalCase (i.e. class names) as it is translated into a class @@ -177,6 +178,57 @@ class Outputs: assert list(wf.node_names) == ["Add", "Mul"] +def test_workflow_lazy(): + + @workflow.define(lazy=["input_video", "watermark"]) + def MyTestShellWorkflow( + input_video: video.Mp4, + watermark: image.Png, + watermark_dims: tuple[int, int] = (10, 10), + ) -> video.Mp4: + + add_watermark = workflow.add( + shell.define( + "ffmpeg -i -i " + "-filter_complex " + )( + in_video=input_video, + watermark=watermark, + filter="overlay={}:{}".format(*watermark_dims), + ), + name="add_watermark", + ) + output_video = workflow.add( + shell.define( + "HandBrakeCLI -i -o " + "--width --height ", + # By default any input/output specified with a flag (e.g. -i ) + # is considered optional, i.e. of type `FsObject | None`, and therefore + # won't be used by default. By overriding this with non-optional types, + # the fields are specified as being required. + inputs={"in_video": video.Mp4}, + outputs={"out_video": video.Mp4}, + )(in_video=add_watermark.out_video, width=1280, height=720), + name="resize", + ).out_video + + return output_video # test implicit detection of output name + + input_video = video.Mp4.mock("input.mp4") + watermark = image.Png.mock("watermark.png") + workflow_spec = MyTestShellWorkflow( + input_video=input_video, + watermark=watermark, + ) + wf = Workflow.construct(workflow_spec) + assert wf["add_watermark"].inputs.in_video == LazyField( + name=WORKFLOW_LZIN, field="input_video", type=video.Mp4, type_checked=True + ) + assert wf["add_watermark"].inputs.watermark == LazyField( + name=WORKFLOW_LZIN, field="watermark", type=image.Png, type_checked=True + ) + + def test_direct_access_of_workflow_object(): @python.define(inputs={"x": float}, outputs={"z": float}) diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow.py index f9058cf152..adf2193578 100644 --- a/pydra/engine/workflow.py +++ b/pydra/engine/workflow.py @@ -319,11 +319,11 @@ def construct( for lzy_inpt in lazy_inputs: setattr( lazy_spec, - lzy_inpt, + lzy_inpt.name, LazyField( - WORKFLOW_LZIN, - lzy_inpt.name, - lzy_inpt.type, + name=WORKFLOW_LZIN, + field=lzy_inpt.name, + type=lzy_inpt.type, ), ) From 476f7dbb1bb644a0f4a50e9024bb672cc2ab69dc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 03:14:49 +0000 Subject: [PATCH 044/342] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydra/mark/shell.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydra/mark/shell.py b/pydra/mark/shell.py index 9abdcf61fe..d0cde91337 100644 --- a/pydra/mark/shell.py +++ b/pydra/mark/shell.py @@ -1,4 +1,5 @@ """Decorators and helper functions to create ShellCommandTasks used in Pydra workflows""" + from __future__ import annotations import typing as ty import attrs From cd97c11970b31afa1665151f0750e793fca6ad79 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 3 Dec 2024 21:44:38 +1100 Subject: [PATCH 045/342] Debugged splitting and combining of lazy fields --- pydra/design/base.py | 285 +++++++++++++++++++++++++--- pydra/design/python.py | 8 +- pydra/design/shell.py | 8 +- pydra/design/tests/test_workflow.py | 61 +++--- pydra/design/workflow.py | 8 +- pydra/engine/specs.py | 26 ++- pydra/engine/workflow.py | 131 ++++++++----- pydra/utils/typing.py | 2 - 8 files changed, 392 insertions(+), 137 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 66ee51cab9..64e0ab8510 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -5,6 +5,7 @@ import enum from pathlib import Path from copy import copy +from typing_extensions import Self import attrs.validators from attrs.converters import default_if_none from fileformats.generic import File @@ -21,16 +22,20 @@ ) from pydra.engine.core import Task, AuditFlag + __all__ = [ "Field", "Arg", "Out", "TaskSpec", - "collate_with_helps", + "OutputsSpec", + "ensure_field_objects", "make_task_spec", "list_fields", ] +RESERVED_OUTPUT_NAMES = ("split", "combine") + class _Empty(enum.Enum): @@ -149,7 +154,73 @@ class Out(Field): pass -OutputType = ty.TypeVar("OutputType") +class OutputsSpec: + """Base class for all output specifications""" + + def split( + self, + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, + /, + overwrite: bool = False, + cont_dim: ty.Optional[dict] = None, + **inputs, + ) -> Self: + """ + Run this task parametrically over lists of split inputs. + + Parameters + ---------- + splitter : str or list[str] or tuple[str] or None + the fields which to split over. If splitting over multiple fields, lists of + fields are interpreted as outer-products and tuples inner-products. If None, + then the fields to split are taken from the keyword-arg names. + overwrite : bool, optional + whether to overwrite an existing split on the node, by default False + cont_dim : dict, optional + Container dimensions for specific inputs, used in the splitter. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. + **inputs + fields to split over, will automatically be wrapped in a StateArray object + and passed to the node inputs + + Returns + ------- + self : TaskBase + a reference to the task + """ + self._node.split(splitter, overwrite=overwrite, cont_dim=cont_dim, **inputs) + return self + + def combine( + self, + combiner: ty.Union[ty.List[str], str], + overwrite: bool = False, # **kwargs + ) -> Self: + """ + Combine inputs parameterized by one or more previous tasks. + + Parameters + ---------- + combiner : list[str] or str + the field or list of inputs to be combined (i.e. not left split) after the + task has been run + overwrite : bool + whether to overwrite an existing combiner on the node + **kwargs : dict[str, Any] + values for the task that will be "combined" before they are provided to the + node + + Returns + ------- + self : Self + a reference to the outputs object + """ + self._node.combine(combiner, overwrite=overwrite) + return self + + +OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) class TaskSpec(ty.Generic[OutputType]): @@ -197,13 +268,33 @@ def _check_for_unset_values(self): ) -def get_fields_from_class( +def extract_fields_from_class( klass: type, arg_type: type[Arg], out_type: type[Out], auto_attribs: bool, ) -> tuple[dict[str, Arg], dict[str, Out]]: - """Parse the input and output fields from a class""" + """Extract the input and output fields from an existing class + + Parameters + ---------- + klass : type + The class to extract the fields from + arg_type : type + The type of the input fields + out_type : type + The type of the output fields + auto_attribs : bool + Whether to assume that all attribute annotations should be interpreted as + fields or not + + Returns + ------- + inputs : dict[str, Arg] + The input fields extracted from the class + outputs : dict[str, Out] + The output fields extracted from the class + """ input_helps, _ = parse_doc_string(klass.__doc__) @@ -269,31 +360,50 @@ def make_task_spec( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), ): + """Create a task specification class and its outputs specification class from the + input and output fields provided to the decorator/function. + + Modifies the class so that its attributes are converted from pydra fields to attrs fields + and then calls `attrs.define` to create an attrs class (dataclass-like). + on + + Parameters + ---------- + task_type : type + The type of the task to be created + inputs : dict[str, Arg] + The input fields of the task + outputs : dict[str, Out] + The output fields of the task + klass : type, optional + The class to be decorated, by default None + name : str, optional + The name of the class, by default + bases : ty.Sequence[type], optional + The base classes for the task specification class, by default () + outputs_bases : ty.Sequence[type], optional + The base classes for the outputs specification class, by default () + + Returns + ------- + klass : type + The class created using the attrs package + """ if name is None and klass is not None: name = klass.__name__ - outputs_klass = type( - "Outputs", - tuple(outputs_bases), - { - o.name: attrs.field( - converter=make_converter(o, f"{name}.Outputs"), - metadata={PYDRA_ATTR_METADATA: o}, - **_get_default(o), - ) - for o in outputs.values() - }, - ) - outputs_klass.__annotations__.update((o.name, o.type) for o in outputs.values()) - outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) - + outputs_klass = make_outputs_spec(outputs, outputs_bases, name) if klass is None or not issubclass(klass, TaskSpec): if name is None: raise ValueError("name must be provided if klass is not") bases = tuple(bases) + # Ensure that TaskSpec is a base class if not any(issubclass(b, TaskSpec) for b in bases): bases = bases + (TaskSpec,) + # If building from a decorated class (as opposed to dynamically from a function + # or shell-template), add any base classes not already in the bases tuple if klass is not None: bases += tuple(c for c in klass.__mro__ if c not in bases + (object,)) + # Create a new class with the TaskSpec as a base class klass = types.new_class( name=name, bases=bases, @@ -303,7 +413,7 @@ def make_task_spec( ), ) else: - # Ensure that the class has it's own annotaitons dict so we can modify it without + # Ensure that the class has it's own annotations dict so we can modify it without # messing up other classes klass.__annotations__ = copy(klass.__annotations__) klass.Task = task_type @@ -345,7 +455,53 @@ def make_task_spec( return attrs_klass -def collate_with_helps( +def make_outputs_spec( + outputs: dict[str, Out], bases: ty.Sequence[type], spec_name: str +) -> type[OutputsSpec]: + """Create an outputs specification class and its outputs specification class from the + output fields provided to the decorator/function. + + Creates a new class with attrs fields and then calls `attrs.define` to create an + attrs class (dataclass-like). + + Parameters + ---------- + outputs : dict[str, Out] + The output fields of the task + bases : ty.Sequence[type], optional + The base classes for the outputs specification class, by default () + spec_name : str + The name of the task specification class the outputs are for + + Returns + ------- + klass : type + The class created using the attrs package + """ + if not any(issubclass(b, OutputsSpec) for b in bases): + outputs_bases = bases + (OutputsSpec,) + if reserved_names := [n for n in outputs if n in RESERVED_OUTPUT_NAMES]: + raise ValueError( + f"{reserved_names} are reserved and cannot be used for output field names" + ) + outputs_klass = type( + spec_name + "Outputs", + tuple(outputs_bases), + { + o.name: attrs.field( + converter=make_converter(o, f"{spec_name}.Outputs"), + metadata={PYDRA_ATTR_METADATA: o}, + **_get_default(o), + ) + for o in outputs.values() + }, + ) + outputs_klass.__annotations__.update((o.name, o.type) for o in outputs.values()) + outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) + return outputs_klass + + +def ensure_field_objects( arg_type: type[Arg], out_type: type[Out], doc_string: str | None = None, @@ -354,7 +510,33 @@ def collate_with_helps( input_helps: dict[str, str] | None = None, output_helps: dict[str, str] | None = None, ) -> tuple[dict[str, Arg], dict[str, Out]]: - """Assign help strings to the appropriate inputs and outputs""" + """Converts dicts containing input/output types into input/output, including any + help strings to the appropriate inputs and outputs + + Parameters + ---------- + arg_type : type + The type of the input fields + out_type : type + The type of the output fields + doc_string : str, optional + The docstring of the function or class + inputs : dict[str, Arg | type], optional + The inputs to the function or class + outputs : dict[str, Out | type], optional + The outputs of the function or class + input_helps : dict[str, str], optional + The help strings for the inputs + output_helps : dict[str, str], optional + The help strings for the outputs + + Returns + ------- + inputs : dict[str, Arg] + The input fields with help strings added + outputs : dict[str, Out] + The output fields with help strings added + """ for input_name, arg in list(inputs.items()): if isinstance(arg, Arg): @@ -403,7 +585,24 @@ def collate_with_helps( def make_converter( field: Field, interface_name: str, field_type: ty.Type | None = None -): +) -> ty.Callable[..., ty.Any]: + """Makes an attrs converter for the field, combining type checking with any explicit + converters + + Parameters + ---------- + field : Field + The field to make the converter for + interface_name : str + The name of the interface the field is part of + field_type : type, optional + The type of the field, by default None + + Returns + ------- + converter : callable + The converter for the field + """ if field_type is None: field_type = field.type checker_label = f"'{field.name}' field of {interface_name} interface" @@ -425,7 +624,22 @@ def make_converter( return converter -def make_validator(field: Field, interface_name: str): +def make_validator(field: Field, interface_name: str) -> ty.Callable[..., None] | None: + """Makes an attrs validator for the field, combining allowed values and any explicit + validators + + Parameters + ---------- + field : Field + The field to make the validator for + interface_name : str + The name of the interface the field is part of + + Returns + ------- + validator : callable + The validator for the field + """ validators = [] if field.allowed_values: validators.append(allowed_values_validator) @@ -458,7 +672,28 @@ def extract_function_inputs_and_outputs( outputs: list[str | Out] | dict[str, Out | type] | type | None = None, ) -> tuple[dict[str, type | Arg], dict[str, type | Out]]: """Extract input output types and output names from the function source if they - aren't explicitly""" + aren't explicitly + + Parameters + ---------- + function : callable + The function to extract the inputs and outputs from + arg_type : type + The type of the input fields + out_type : type + The type of the output fields + inputs : list[str | Arg] | dict[str, Arg | type] | None + The inputs to the function + outputs : list[str | Out] | dict[str, Out | type] | type | None + The outputs of the function + + Returns + ------- + inputs : dict[str, Arg] + The input fields extracted from the function + outputs : dict[str, Out] + The output fields extracted from the function + """ # if undefined_symbols := get_undefined_symbols( # function, exclude_signature_type_hints=True, ignore_decorator=True # ): diff --git a/pydra/design/python.py b/pydra/design/python.py index fa540cae42..154c5f8a6d 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -5,13 +5,13 @@ from .base import ( Arg, Out, - collate_with_helps, + ensure_field_objects, make_task_spec, TaskSpec, parse_doc_string, extract_function_inputs_and_outputs, check_explicit_fields_are_none, - get_fields_from_class, + extract_fields_from_class, ) @@ -109,7 +109,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: function = klass.function name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) - parsed_inputs, parsed_outputs = get_fields_from_class( + parsed_inputs, parsed_outputs = extract_fields_from_class( klass, arg, out, auto_attribs ) else: @@ -125,7 +125,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: ) name = function.__name__ - parsed_inputs, parsed_outputs = collate_with_helps( + parsed_inputs, parsed_outputs = ensure_field_objects( arg_type=arg, out_type=out, inputs=inferred_inputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index c14a514d84..714f67e80f 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -15,8 +15,8 @@ Arg, Out, check_explicit_fields_are_none, - get_fields_from_class, - collate_with_helps, + extract_fields_from_class, + ensure_field_objects, TaskSpec, make_task_spec, EMPTY, @@ -242,7 +242,7 @@ def make( ) from None class_name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) - parsed_inputs, parsed_outputs = get_fields_from_class( + parsed_inputs, parsed_outputs = extract_fields_from_class( klass, arg, out, auto_attribs ) else: @@ -259,7 +259,7 @@ def make( outputs=outputs, ) - parsed_inputs, parsed_outputs = collate_with_helps( + parsed_inputs, parsed_outputs = ensure_field_objects( arg_type=arg, out_type=out, inputs=inferred_inputs, diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 077996a57b..1502ce4f6a 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -1,8 +1,8 @@ from operator import attrgetter import pytest import attrs -from pydra.engine.workflow import Workflow, WORKFLOW_LZIN -from pydra.engine.specs import LazyField +from pydra.engine.workflow import Workflow +from pydra.engine.specs import LazyInField, LazyOutField import typing as ty from pydra.design import shell, python, workflow, list_fields, TaskSpec from fileformats import video, image @@ -44,9 +44,7 @@ def MyTestWorkflow(a, b): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyField( - name="Mul", field="out", type=ty.Any, type_checked=True - ) + assert wf.outputs.out == LazyOutField(name="Mul", field="out", type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -108,8 +106,8 @@ def MyTestShellWorkflow( wf = Workflow.construct(workflow_spec) assert wf.inputs.input_video == input_video assert wf.inputs.watermark == watermark - assert wf.outputs.output_video == LazyField( - name="resize", field="out_video", type=video.Mp4, type_checked=True + assert wf.outputs.output_video == LazyOutField( + name="resize", field="out_video", type=video.Mp4 ) assert list(wf.node_names) == ["add_watermark", "resize"] @@ -170,9 +168,7 @@ class Outputs: wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyField( - name="Mul", field="out", type=ty.Any, type_checked=True - ) + assert wf.outputs.out == LazyOutField(name="Mul", field="out", type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -221,11 +217,11 @@ def MyTestShellWorkflow( watermark=watermark, ) wf = Workflow.construct(workflow_spec) - assert wf["add_watermark"].inputs.in_video == LazyField( - name=WORKFLOW_LZIN, field="input_video", type=video.Mp4, type_checked=True + assert wf["add_watermark"].inputs.in_video == LazyInField( + field="input_video", type=video.Mp4 ) - assert wf["add_watermark"].inputs.watermark == LazyField( - name=WORKFLOW_LZIN, field="watermark", type=image.Png, type_checked=True + assert wf["add_watermark"].inputs.watermark == LazyInField( + field="watermark", type=image.Png ) @@ -281,11 +277,9 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyField( - name="Mul", field="out", type=float, type_checked=True - ) - assert wf.outputs.out2 == LazyField( - name="division", field="divided", type=ty.Any, type_checked=True + assert wf.outputs.out1 == LazyOutField(name="Mul", field="out", type=float) + assert wf.outputs.out2 == LazyOutField( + name="division", field="divided", type=ty.Any ) assert list(wf.node_names) == ["addition", "Mul", "division"] @@ -329,12 +323,8 @@ def MyTestWorkflow(a: int, b: float): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyField( - name="Mul", field="out", type=ty.Any, type_checked=True - ) - assert wf.outputs.out2 == LazyField( - name="Add", field="out", type=ty.Any, type_checked=True - ) + assert wf.outputs.out1 == LazyOutField(name="Mul", field="out", type=ty.Any) + assert wf.outputs.out2 == LazyOutField(name="Add", field="out", type=ty.Any) assert list(wf.node_names) == ["Add", "Mul"] @@ -350,18 +340,14 @@ def Sum(x: list[float]) -> float: @workflow.define def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: - - wf = workflow.this() - mul = wf.add(Mul()) - # We could avoid having to specify the splitter and combiner on a separate - # line by making 'split' and 'combine' reserved keywords for Outputs class attrs - wf["Mul"].split(x=a, y=b).combine("a") - sum = wf.add(Sum(x=mul.out)) + mul = workflow.add(Mul()).split(x=a, y=b).combine("x") + sum = workflow.add(Sum(x=mul.out)) return sum.out wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) - assert wf["Mul"]._state.splitter == ["x", "y"] - assert wf["Mul"]._state.combiner == ["x"] + assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] + assert wf["Mul"].combiner == ["Mul.x"] + assert wf.outputs.out == LazyOutField(name="Sum", field="out", type=list[float]) def test_workflow_split_after_access_fail(): @@ -381,10 +367,9 @@ def Mul(x, y): @workflow.define def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: - wf = workflow.this() - add = wf.add(Add()) - mul = wf.add(Mul(x=add.out, y=2.0)) # << Add.out is accessed here - wf["Add"].split(x=a, y=b).combine("x") + add = workflow.add(Add()) + mul = workflow.add(Mul(x=add.out, y=2.0)) # << Add.out is accessed here + add.split(x=a, y=b).combine("x") return mul.out with pytest.raises(RuntimeError, match="Outputs .* have already been accessed"): diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 506260cbbb..bf804944ed 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -6,13 +6,13 @@ from .base import ( Arg, Out, - collate_with_helps, + ensure_field_objects, make_task_spec, TaskSpec, parse_doc_string, extract_function_inputs_and_outputs, check_explicit_fields_are_none, - get_fields_from_class, + extract_fields_from_class, ) @@ -116,7 +116,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: constructor = klass.constructor name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) - parsed_inputs, parsed_outputs = get_fields_from_class( + parsed_inputs, parsed_outputs = extract_fields_from_class( klass, arg, out, auto_attribs ) else: @@ -132,7 +132,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: ) name = constructor.__name__ - parsed_inputs, parsed_outputs = collate_with_helps( + parsed_inputs, parsed_outputs = ensure_field_objects( arg_type=arg, out_type=out, inputs=inferred_inputs, diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 57fbb80833..f1430aad41 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -8,12 +8,13 @@ from copy import copy from glob import glob import attr +from typing_extensions import Self from fileformats.core import FileSet from fileformats.generic import ( File, Directory, ) -import pydra +import pydra.engine from .helpers_file import template_update_single from pydra.utils.hash import hash_function, Cache @@ -694,7 +695,7 @@ def __getattr__(self, name): # "_myarbitrarytask" combined_upstreams = set() if self._task.state: - for scalar in LazyField.sanitize_splitter( + for scalar in LazyField.normalize_splitter( self._task.state.splitter, strip_previous=False ): for field in scalar: @@ -742,7 +743,7 @@ def _get_task_splits(self) -> ty.Set[ty.Tuple[ty.Tuple[str, ...], ...]]: splits = set() if splitter: # Ensure that splits is of tuple[tuple[str, ...], ...] form - splitter = LazyField.sanitize_splitter(splitter) + splitter = LazyField.normalize_splitter(splitter) if splitter: splits.add(splitter) for inpt in attr.asdict(self._task.inputs, recurse=False).values(): @@ -817,14 +818,16 @@ class LazyField(ty.Generic[T]): factory=frozenset, converter=frozenset ) cast_from: ty.Optional[ty.Type[ty.Any]] = None - type_checked: bool = False + # type_checked will be set to False after it is created but defaults to True here for + # ease of testing + type_checked: bool = True def __bytes_repr__(self, cache): yield type(self).__name__.encode() yield self.name.encode() yield self.field.encode() - def cast(self, new_type: TypeOrAny) -> "LazyField": + def cast(self, new_type: TypeOrAny) -> Self: """ "casts" the lazy field to a new type Parameters @@ -845,7 +848,7 @@ def cast(self, new_type: TypeOrAny) -> "LazyField": cast_from=self.cast_from if self.cast_from else self.type, ) - def split(self, splitter: Splitter) -> "LazyField": + def split(self, splitter: Splitter) -> Self: """ "Splits" the lazy field over an array of nodes by replacing the sequence type of the lazy field with StateArray to signify that it will be "split" across @@ -858,7 +861,7 @@ def split(self, splitter: Splitter) -> "LazyField": TypeParser, ) # pylint: disable=import-outside-toplevel - splits = self.splits | set([LazyField.sanitize_splitter(splitter)]) + splits = self.splits | set([LazyField.normalize_splitter(splitter)]) # Check to see whether the field has already been split over the given splitter if splits == self.splits: return self @@ -884,8 +887,10 @@ def split(self, splitter: Splitter) -> "LazyField": splits=splits, ) + # def combine(self, combiner: str | list[str]) -> Self: + @classmethod - def sanitize_splitter( + def normalize_splitter( cls, splitter: Splitter, strip_previous: bool = True ) -> ty.Tuple[ty.Tuple[str, ...], ...]: """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form @@ -918,11 +923,14 @@ def _apply_cast(self, value): return value +@attr.s(auto_attribs=True, kw_only=True) class LazyInField(LazyField[T]): + + name: str = None attr_type = "input" def get_value( - self, wf: "pydra.Workflow", state_index: ty.Optional[int] = None + self, wf: "pydra.engine.workflow.Workflow", state_index: ty.Optional[int] = None ) -> ty.Any: """Return the value of a lazy field. diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow.py index adf2193578..475c60d338 100644 --- a/pydra/engine/workflow.py +++ b/pydra/engine/workflow.py @@ -1,18 +1,18 @@ import typing as ty -import enum from copy import copy, deepcopy from operator import itemgetter from typing_extensions import Self import attrs -from pydra.design.base import list_fields, TaskSpec -from pydra.engine.specs import LazyField, StateArray +from pydra.design.base import list_fields, TaskSpec, OutputsSpec +from pydra.engine.specs import LazyField, LazyInField, LazyOutField, StateArray from pydra.utils.hash import hash_function +from pydra.utils.typing import TypeParser from . import helpers_state as hlpst from .helpers import ensure_list from . import state -OutputType = ty.TypeVar("OutputType") +OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) @attrs.define @@ -29,52 +29,41 @@ class Node(ty.Generic[OutputType]): name: str inputs: TaskSpec[OutputType] - _splitter: str | list[str] | tuple[str] | None = None - _combiner: list | str = None _workflow: "Workflow" = None _lzout: OutputType | None = None _state: state.State | None = None - _cont_dim: dict[str, int] | None = None + _cont_dim: dict[str, int] | None = ( + None # QUESTION: should this be included in the state? + ) @property def lzout(self) -> OutputType: """The output spec of the node populated with lazy fields""" if self._lzout is not None: return self._lzout - state_array_depth = 0 - for inpt_name, inpt_val in attrs.asdict(self.inputs).items(): + combined_splitter = set() + for inpt_name, inpt_val in attrs.asdict(self.inputs, recurse=False).items(): if isinstance(inpt_val, LazyField): - tp = inpt_val.type - depth = 0 - while isinstance(tp, StateArray): - tp = ty.get_args(tp)[0] - depth += 1 - # FIXME: This won't be able to differentiate between state arrays - # from different splits and those from the same split, we might need to - # keep track of that in the LazyField... (am I right about this??) - state_array_depth = max(state_array_depth, depth) + combined_splitter.update(inpt_val.splits) lazy_fields = {} for field in list_fields(self.inputs.Outputs): - # TODO: need to reimplement the full spliter/combiner logic here - if self._splitter and field.name in self._splitter: - if field.name in self._combiner: - type_ = list[field.type] - else: - type_ = StateArray(field.type) - else: - type_ = field.type - for _ in range(state_array_depth): + type_ = field.type + # Wrap types of lazy outputs in StateArray types if the input fields are split + # over state values + for _ in range(len(combined_splitter)): type_ = StateArray[type_] - lazy_fields[field.name] = LazyField( + lazy_fields[field.name] = LazyOutField( name=self.name, field=field.name, type=type_, + splits=frozenset(iter(combined_splitter)), ) outputs = self.inputs.Outputs(**lazy_fields) # Flag the output lazy fields as being not typed checked (i.e. assigned to another # node's inputs) yet for outpt in attrs.asdict(outputs, recurse=False).values(): outpt.type_checked = False + outputs._node = self self._lzout = outputs return outputs @@ -107,7 +96,7 @@ def split( Returns ------- - self : TaskBase + self : TaskSpec a reference to the task """ self._check_if_outputs_have_been_used() @@ -157,6 +146,16 @@ def split( self.inputs = attrs.evolve(self.inputs, **new_inputs) if not self._state or splitter != self._state.splitter: self._set_state(splitter) + # Wrap types of lazy outputs in StateArray types + split_depth = len(LazyField.normalize_splitter(splitter)) + outpt_lf: LazyOutField + for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): + assert not outpt_lf.type_checked + outpt_type = outpt_lf.type + for d in range(split_depth): + outpt_type = StateArray[outpt_type] + outpt_lf.type = outpt_type + outpt_lf.splits = frozenset(iter(self._state.splitter)) return self def combine( @@ -180,12 +179,19 @@ def combine( Returns ------- - self : TaskBase + self : TaskSpec a reference to the task """ if not isinstance(combiner, (str, list)): raise Exception("combiner has to be a string or a list") combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) + if not_split := [ + c for c in combiner if not any(c in s for s in self._state.splitter) + ]: + raise ValueError( + f"Combiner fields {not_split} for Node {self.name!r} are not in the " + f"splitter fields {self._state.splitter}" + ) if ( self._state and self._state.combiner @@ -204,6 +210,24 @@ def combine( self._state.fut_combiner = combiner else: # self.state and not self.state.combiner self._set_state(splitter=self._state.splitter, combiner=combiner) + # Wrap types of lazy outputs in StateArray types + norm_splitter = LazyField.normalize_splitter(self._state.splitter) + remaining_splits = [ + s for s in norm_splitter if not any(c in s for c in combiner) + ] + combine_depth = len(norm_splitter) - len(remaining_splits) + outpt_lf: LazyOutField + for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): + assert not outpt_lf.type_checked + outpt_type, split_depth = TypeParser.strip_splits(outpt_lf.type) + assert split_depth >= combine_depth, ( + f"Attempting to combine a field that has not been split enough times: " + f"{outpt_lf.name} ({outpt_lf.type}), {self._state.splitter} -> {combiner}" + ) + outpt_lf.type = list[outpt_type] + for _ in range(split_depth - combine_depth): + outpt_lf.type = StateArray[outpt_lf.type] + outpt_lf.splits = frozenset(iter(remaining_splits)) return self def _set_state(self, splitter, combiner=None): @@ -243,6 +267,18 @@ def cont_dim(self, cont_dim): else: self._cont_dim = cont_dim + @property + def splitter(self): + if not self._state: + return None + return self._state.splitter + + @property + def combiner(self): + if not self._state: + return None + return self._state.combiner + def _check_if_outputs_have_been_used(self): used = [] if self._lzout: @@ -320,8 +356,7 @@ def construct( setattr( lazy_spec, lzy_inpt.name, - LazyField( - name=WORKFLOW_LZIN, + LazyInField( field=lzy_inpt.name, type=lzy_inpt.type, ), @@ -332,23 +367,28 @@ def construct( cls._under_construction = wf try: # Call the user defined constructor to set the outputs - output_values = constructor(**input_values) + output_lazy_fields = constructor(**input_values) # Check to see whether any mandatory inputs are not set for node in wf.nodes: node.inputs._check_for_unset_values() # Check that the outputs are set correctly, either directly by the constructor # or via returned values that can be zipped with the output names - if output_values: - if not isinstance(output_values, (list, tuple)): - output_values = [output_values] + if output_lazy_fields: + if not isinstance(output_lazy_fields, (list, tuple)): + output_lazy_fields = [output_lazy_fields] output_fields = list_fields(spec.Outputs) - if len(output_values) != len(output_fields): + if len(output_lazy_fields) != len(output_fields): raise ValueError( f"Expected {len(output_fields)} outputs, got " - f"{len(output_values)} ({output_values})" + f"{len(output_lazy_fields)} ({output_lazy_fields})" ) - for outpt, oupt_val in zip(output_fields, output_values): - setattr(outputs, outpt.name, oupt_val) + for outpt, outpt_lf in zip(output_fields, output_lazy_fields): + if TypeParser.get_origin(outpt_lf.type) is StateArray: + # Automatically combine any uncombined state arrays into lists + tp, _ = TypeParser.strip_splits(outpt_lf.type) + outpt_lf.type = list[tp] + outpt_lf.splits = frozenset() + setattr(outputs, outpt.name, outpt_lf) else: if unset_outputs := [ a @@ -398,14 +438,3 @@ def under_construction(cls) -> "Workflow[ty.Any]": _under_construction: "Workflow[ty.Any]" = None _constructed: dict[int, "Workflow[ty.Any]"] = {} - - -class _WorkflowLzin(enum.Enum): - - WORKFLOW_LZIN = enum.auto() - - def __repr__(self): - return "WORKFLOW_LZIN" - - -WORKFLOW_LZIN = _WorkflowLzin.WORKFLOW_LZIN diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index ba74d02030..c9f1b9b592 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -960,8 +960,6 @@ def strip_splits(cls, type_: ty.Type[ty.Any]) -> ty.Tuple[ty.Type, int]: ---------- type_ : ty.Type[ty.Any] the type to list the nested sequences of - only_splits : bool, optional - whether to only return nested splits, not all sequence types Returns ------- From d8250562fd2ba637d20f580a0d06126f5b669839 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 4 Dec 2024 17:29:35 +1100 Subject: [PATCH 046/342] updated python versions to include 3.13 and drop <3.10 --- .github/workflows/testdask.yml | 2 +- .github/workflows/testpydra.yml | 2 +- .github/workflows/testsingularity.yml | 2 +- .github/workflows/testslurm.yml | 2 +- pyproject.toml | 5 ++--- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/testdask.yml b/.github/workflows/testdask.yml index 7ca8a29f51..55cc0e665a 100644 --- a/.github/workflows/testdask.yml +++ b/.github/workflows/testdask.yml @@ -18,7 +18,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - python-version: ['3.9', '3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12', '3.13'] fail-fast: false runs-on: ${{ matrix.os }} diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 3ead2e3a6b..db4d61f046 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -50,7 +50,7 @@ jobs: strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.10', '3.11', '3.12', '3.13'] install: ['wheel'] include: - os: 'ubuntu-latest' diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index 6cb597cdf8..f0d864a9e1 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, "3.10", "3.11"] + python-version: ['3.10', '3.11', '3.12', '3.13'] fail-fast: False steps: diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index 0e1d17f09b..a518c10951 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -14,7 +14,7 @@ jobs: build: strategy: matrix: - python-version: [3.8.16, 3.9.16, 3.10.9, 3.11.5] + python-version: [3.10.9, 3.11.5] fail-fast: false runs-on: ubuntu-latest env: diff --git a/pyproject.toml b/pyproject.toml index 150e6b8dba..871b9efc88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "hatchling.build" name = "pydra" description = "Pydra dataflow engine" readme = "README.rst" -requires-python = ">=3.8" +requires-python = ">=3.10" dependencies = [ "attrs >=19.1.0", "cloudpickle >=2.0.0", @@ -32,11 +32,10 @@ classifiers = [ "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", ] dynamic = ["version"] From 03e69517813f422be9205eac9718a54de9411d07 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 5 Dec 2024 13:57:04 +1100 Subject: [PATCH 047/342] resolved gnarly circular imports --- pydra/design/__init__.py | 4 +- pydra/design/base.py | 156 +- pydra/design/python.py | 2 +- pydra/design/shell.py | 62 +- pydra/design/tests/test_python.py | 3 +- pydra/design/tests/test_shell.py | 6 +- pydra/design/tests/test_workflow.py | 58 +- pydra/design/workflow.py | 36 +- pydra/engine/__init__.py | 2 - pydra/engine/audit.py | 9 +- pydra/engine/core.py | 328 +-- pydra/engine/helpers.py | 67 +- pydra/engine/specs.py | 1761 ++++++++--------- pydra/engine/state.py | 9 +- pydra/engine/task.py | 12 +- pydra/engine/workflow/__init__.py | 0 pydra/engine/workflow/base.py | 178 ++ pydra/engine/workflow/lazy.py | 250 +++ .../engine/{workflow.py => workflow/node.py} | 109 +- pydra/utils/typing.py | 51 +- 20 files changed, 1653 insertions(+), 1450 deletions(-) create mode 100644 pydra/engine/workflow/__init__.py create mode 100644 pydra/engine/workflow/base.py create mode 100644 pydra/engine/workflow/lazy.py rename pydra/engine/{workflow.py => workflow/node.py} (83%) diff --git a/pydra/design/__init__.py b/pydra/design/__init__.py index 9b86627949..0cfe94caa9 100644 --- a/pydra/design/__init__.py +++ b/pydra/design/__init__.py @@ -1,6 +1,6 @@ -from .base import TaskSpec, list_fields from . import python from . import shell +from . import workflow -__all__ = ["TaskSpec", "list_fields", "python", "shell"] +__all__ = ["python", "shell", "workflow"] diff --git a/pydra/design/base.py b/pydra/design/base.py index 64e0ab8510..0fbf79ac82 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -5,33 +5,35 @@ import enum from pathlib import Path from copy import copy -from typing_extensions import Self import attrs.validators from attrs.converters import default_if_none from fileformats.generic import File from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union - -# from pydra.utils.misc import get_undefined_symbols -from pydra.engine.helpers import from_list_if_single, ensure_list -from pydra.engine.specs import ( - LazyField, +from pydra.engine.helpers import ( + from_list_if_single, + ensure_list, + PYDRA_ATTR_METADATA, + list_fields, +) +from pydra.utils.typing import ( MultiInputObj, MultiInputFile, MultiOutputObj, MultiOutputFile, ) -from pydra.engine.core import Task, AuditFlag +from pydra.engine.workflow.lazy import LazyField +if ty.TYPE_CHECKING: + from pydra.engine.specs import OutputsSpec + from pydra.engine.core import Task + __all__ = [ "Field", "Arg", "Out", - "TaskSpec", - "OutputsSpec", "ensure_field_objects", "make_task_spec", - "list_fields", ] RESERVED_OUTPUT_NAMES = ("split", "combine") @@ -154,120 +156,6 @@ class Out(Field): pass -class OutputsSpec: - """Base class for all output specifications""" - - def split( - self, - splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, - /, - overwrite: bool = False, - cont_dim: ty.Optional[dict] = None, - **inputs, - ) -> Self: - """ - Run this task parametrically over lists of split inputs. - - Parameters - ---------- - splitter : str or list[str] or tuple[str] or None - the fields which to split over. If splitting over multiple fields, lists of - fields are interpreted as outer-products and tuples inner-products. If None, - then the fields to split are taken from the keyword-arg names. - overwrite : bool, optional - whether to overwrite an existing split on the node, by default False - cont_dim : dict, optional - Container dimensions for specific inputs, used in the splitter. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - **inputs - fields to split over, will automatically be wrapped in a StateArray object - and passed to the node inputs - - Returns - ------- - self : TaskBase - a reference to the task - """ - self._node.split(splitter, overwrite=overwrite, cont_dim=cont_dim, **inputs) - return self - - def combine( - self, - combiner: ty.Union[ty.List[str], str], - overwrite: bool = False, # **kwargs - ) -> Self: - """ - Combine inputs parameterized by one or more previous tasks. - - Parameters - ---------- - combiner : list[str] or str - the field or list of inputs to be combined (i.e. not left split) after the - task has been run - overwrite : bool - whether to overwrite an existing combiner on the node - **kwargs : dict[str, Any] - values for the task that will be "combined" before they are provided to the - node - - Returns - ------- - self : Self - a reference to the outputs object - """ - self._node.combine(combiner, overwrite=overwrite) - return self - - -OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) - - -class TaskSpec(ty.Generic[OutputType]): - """Base class for all task specifications""" - - Task: ty.Type[Task] - - def __call__( - self, - name: str | None = None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - inputs: ty.Text | File | dict[str, ty.Any] | None = None, - cont_dim=None, - messenger_args=None, - messengers=None, - rerun=False, - **kwargs, - ): - self._check_for_unset_values() - task = self.Task( - self, - name=name, - audit_flags=audit_flags, - cache_dir=cache_dir, - cache_locations=cache_locations, - inputs=inputs, - cont_dim=cont_dim, - messenger_args=messenger_args, - messengers=messengers, - rerun=rerun, - ) - return task(**kwargs) - - def _check_for_unset_values(self): - if unset := [ - k - for k, v in attrs.asdict(self, recurse=False).items() - if v is attrs.NOTHING - ]: - raise ValueError( - f"The following values {unset} in the {self!r} interface need to be set " - "before the workflow can be constructed" - ) - - def extract_fields_from_class( klass: type, arg_type: type[Arg], @@ -352,7 +240,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: def make_task_spec( - task_type: type[Task], + task_type: type["Task"], inputs: dict[str, Arg], outputs: dict[str, Out], klass: type | None = None, @@ -389,6 +277,8 @@ def make_task_spec( klass : type The class created using the attrs package """ + from pydra.engine.specs import TaskSpec + if name is None and klass is not None: name = klass.__name__ outputs_klass = make_outputs_spec(outputs, outputs_bases, name) @@ -457,7 +347,7 @@ def make_task_spec( def make_outputs_spec( outputs: dict[str, Out], bases: ty.Sequence[type], spec_name: str -) -> type[OutputsSpec]: +) -> type["OutputsSpec"]: """Create an outputs specification class and its outputs specification class from the output fields provided to the decorator/function. @@ -478,6 +368,8 @@ def make_outputs_spec( klass : type The class created using the attrs package """ + from pydra.engine.specs import OutputsSpec + if not any(issubclass(b, OutputsSpec) for b in bases): outputs_bases = bases + (OutputsSpec,) if reserved_names := [n for n in outputs if n in RESERVED_OUTPUT_NAMES]: @@ -880,16 +772,6 @@ def split_block(string: str) -> ty.Generator[str, None, None]: yield block.strip() -def list_fields(interface: TaskSpec) -> list[Field]: - if not attrs.has(interface): - return [] - return [ - f.metadata[PYDRA_ATTR_METADATA] - for f in attrs.fields(interface) - if PYDRA_ATTR_METADATA in f.metadata - ] - - def check_explicit_fields_are_none(klass, inputs, outputs): if inputs is not None: raise ValueError( @@ -918,5 +800,3 @@ def nothing_factory(): white_space_re = re.compile(r"\s+") - -PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" diff --git a/pydra/design/python.py b/pydra/design/python.py index 154c5f8a6d..b25d36e010 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -2,12 +2,12 @@ import inspect import attrs from pydra.engine.task import FunctionTask +from pydra.engine.specs import TaskSpec from .base import ( Arg, Out, ensure_field_objects, make_task_spec, - TaskSpec, parse_doc_string, extract_function_inputs_and_outputs, check_explicit_fields_are_none, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 714f67e80f..21d5d435c9 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -11,20 +11,20 @@ from fileformats.core import from_mime from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError +from pydra.engine.specs import TaskSpec from .base import ( Arg, Out, check_explicit_fields_are_none, extract_fields_from_class, ensure_field_objects, - TaskSpec, make_task_spec, EMPTY, ) -from pydra.utils.typing import is_fileset_or_union -from pydra.engine.specs import MultiInputObj +from pydra.utils.typing import is_fileset_or_union, MultiInputObj from pydra.engine.task import ShellCommandTask + __all__ = ["arg", "out", "outarg", "define"] @@ -180,7 +180,6 @@ class outarg(Out, arg): If provided, the field is treated also as an output field and it is added to the output spec. The template can use other fields, e.g. {file1}. Used in order to create an output specification. - """ path_template: str | None = attrs.field(default=None) @@ -204,7 +203,35 @@ def define( auto_attribs: bool = True, name: str | None = None, ) -> TaskSpec: - """Create a shell command interface + """Create a task specification for a shell command. Can be used either as a decorator on + the "canonical" dataclass-form of a task specification or as a function that takes a + "shell-command template string" of the form + + ``` + shell.define("command --output ") + ``` + + Fields are inferred from the template if not provided. In the template, inputs are + specified with `` and outputs with ``. + + ``` + my_command + ``` + + The types of the fields can be specified using their MIME like (see fileformats.core.from_mime), e.g. + + ``` + my_command + ``` + + The template can also specify options with `-` or `--` followed by the option name + and arguments with ``. The type is optional and will default to + `generic/fs-object` if not provided for arguments and `field/text` for + options. The file-formats namespace can be dropped for generic and field formats, e.g. + + ``` + another-command --output + ``` Parameters ---------- @@ -221,6 +248,11 @@ def define( as they appear in the template name: str | None The name of the returned class + + Returns + ------- + TaskSpec + The interface for the shell command """ def make( @@ -331,9 +363,10 @@ def parse_command_line_template( outputs: list[str | Out] | dict[str, Out | type] | None = None, ) -> ty.Tuple[str, dict[str, Arg | type], dict[str, Out | type]]: """Parses a command line template into a name and input and output fields. Fields - are inferred from the template if not provided, where inputs are specified with `` - and outputs with ``. The types of the fields can be specified using their - MIME like (see fileformats.core.from_mime), e.g. + are inferred from the template if not explicitly provided. + + In the template, inputs are specified with `` and outputs with ``. + The types of the fields can be specified using their MIME like (see fileformats.core.from_mime), e.g. ``` my_command @@ -345,7 +378,7 @@ def parse_command_line_template( options. The file-formats namespace can be dropped for generic and field formats, e.g. ``` - another-command --output + another-command --output ``` Parameters @@ -365,6 +398,13 @@ def parse_command_line_template( The input fields of the command line template outputs : dict[str, Out | type] The output fields of the command line template + + Raises + ------ + ValueError + If an unknown token is found in the command line template + TypeError + If an unknown type is found in the command line template """ if isinstance(inputs, list): inputs = {arg.name: arg for arg in inputs} @@ -437,9 +477,9 @@ def from_type_str(type_str) -> type: try: type_ = from_mime(f"generic/{tp}") except FormatRecognitionError: - raise ValueError( + raise TypeError( f"Found unknown type, {tp!r}, in command template: {template!r}" - ) + ) from None types.append(type_) if len(types) == 2 and types[1] == "...": type_ = MultiInputObj[types[0]] diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 8939539d58..54dcd0fda4 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -3,7 +3,8 @@ from decimal import Decimal import attrs import pytest -from pydra.design import list_fields, TaskSpec +from pydra.engine.helpers import list_fields +from pydra.engine.specs import TaskSpec from pydra.design import python from pydra.engine.task import FunctionTask diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index cf9ea7db8a..6d4dc3cac5 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -3,10 +3,12 @@ import attrs import pytest import cloudpickle as cp -from pydra.design import shell, TaskSpec, list_fields +from pydra.design import shell +from pydra.engine.helpers import list_fields +from pydra.engine.specs import TaskSpec from fileformats.generic import File, Directory, FsObject from fileformats import text, image -from pydra.engine.specs import MultiInputObj +from pydra.utils.typing import MultiInputObj def test_interface_template(): diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 1502ce4f6a..9311ddb601 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -1,10 +1,12 @@ from operator import attrgetter import pytest import attrs -from pydra.engine.workflow import Workflow -from pydra.engine.specs import LazyInField, LazyOutField +from pydra.engine.workflow.base import Workflow +from pydra.engine.workflow.lazy import LazyInField, LazyOutField import typing as ty -from pydra.design import shell, python, workflow, list_fields, TaskSpec +from pydra.design import shell, python, workflow +from pydra.engine.helpers import list_fields +from pydra.engine.specs import TaskSpec from fileformats import video, image @@ -44,7 +46,7 @@ def MyTestWorkflow(a, b): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyOutField(name="Mul", field="out", type=ty.Any) + assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -107,7 +109,7 @@ def MyTestShellWorkflow( assert wf.inputs.input_video == input_video assert wf.inputs.watermark == watermark assert wf.outputs.output_video == LazyOutField( - name="resize", field="out_video", type=video.Mp4 + node=wf["resize"], field="out_video", type=video.Mp4 ) assert list(wf.node_names) == ["add_watermark", "resize"] @@ -168,7 +170,7 @@ class Outputs: wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyOutField(name="Mul", field="out", type=ty.Any) + assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -218,10 +220,10 @@ def MyTestShellWorkflow( ) wf = Workflow.construct(workflow_spec) assert wf["add_watermark"].inputs.in_video == LazyInField( - field="input_video", type=video.Mp4 + node=wf, field="input_video", type=video.Mp4 ) assert wf["add_watermark"].inputs.watermark == LazyInField( - field="watermark", type=image.Png + node=wf, field="watermark", type=image.Png ) @@ -277,9 +279,9 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyOutField(name="Mul", field="out", type=float) + assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=float) assert wf.outputs.out2 == LazyOutField( - name="division", field="divided", type=ty.Any + node=wf["division"], field="divided", type=ty.Any ) assert list(wf.node_names) == ["addition", "Mul", "division"] @@ -323,12 +325,12 @@ def MyTestWorkflow(a: int, b: float): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyOutField(name="Mul", field="out", type=ty.Any) - assert wf.outputs.out2 == LazyOutField(name="Add", field="out", type=ty.Any) + assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + assert wf.outputs.out2 == LazyOutField(node=wf["Add"], field="out", type=ty.Any) assert list(wf.node_names) == ["Add", "Mul"] -def test_workflow_split_combine(): +def test_workflow_split_combine1(): @python.define def Mul(x: float, y: float) -> float: @@ -347,7 +349,35 @@ def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] assert wf["Mul"].combiner == ["Mul.x"] - assert wf.outputs.out == LazyOutField(name="Sum", field="out", type=list[float]) + assert wf.outputs.out == LazyOutField(node=wf["Sum"], field="out", type=list[float]) + + +def test_workflow_split_combine2(): + + @python.define + def Mul(x: float, y: float) -> float: + return x * y + + @python.define + def Add(x: float, y: float) -> float: + return x + y + + @python.define + def Sum(x: list[float]) -> float: + return sum(x) + + @workflow.define + def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: + mul = workflow.add(Mul()).split(x=a, y=b) + add = workflow.add(Add(x=mul.out, y=c)).combine("Mul.x") + sum = workflow.add(Sum(x=add.out)) + return sum.out + + wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0], c=2.0)) + assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] + assert wf["Mul"].combiner == ["Mul.x"] + assert wf["Add"].lzout.out.splits == frozenset(["Mul.x"]) + assert wf.outputs.out == LazyOutField(node=wf["Sum"], field="out", type=list[float]) def test_workflow_split_after_access_fail(): diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index bf804944ed..75ac13197f 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -2,18 +2,18 @@ import inspect import attrs from pydra.engine.core import WorkflowTask -from pydra.engine.workflow import Workflow +from pydra.engine.workflow.base import Workflow from .base import ( Arg, Out, ensure_field_objects, make_task_spec, - TaskSpec, parse_doc_string, extract_function_inputs_and_outputs, check_explicit_fields_are_none, extract_fields_from_class, ) +from pydra.engine.specs import TaskSpec __all__ = ["define", "add", "this", "arg", "out"] @@ -94,7 +94,8 @@ def define( auto_attribs: bool = True, ) -> TaskSpec: """ - Create an interface for a function or a class. + Create an interface for a function or a class. Can be used either as a decorator on + a constructor function or the "canonical" dataclass-form of a task specification. Parameters ---------- @@ -106,6 +107,11 @@ def define( The outputs of the function or class. auto_attribs : bool Whether to use auto_attribs mode when creating the class. + + Returns + ------- + TaskSpec + The interface for the function or class. """ if lazy is None: lazy = [] @@ -170,10 +176,30 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: def this() -> Workflow: - """Get the workflow currently being constructed.""" + """Get the workflow currently being constructed. + + Returns + ------- + Workflow + The workflow currently being constructed. + """ return Workflow.under_construction def add(task_spec: TaskSpec[OutputType], name: str = None) -> OutputType: - """Add a task to the current workflow.""" + """Add a node to the workflow currently being constructed + + Parameters + ---------- + task_spec : TaskSpec + The specification of the task to add to the workflow as a node + name : str, optional + The name of the node, by default it will be the name of the task specification + class + + Returns + ------- + OutputType + The outputs specification of the node + """ return this().add(task_spec, name=name) diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index 6fbd7a0063..24ada3c366 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -1,12 +1,10 @@ """The core of the workflow engine.""" -from .submitter import Submitter import __main__ import logging from ._version import __version__ __all__ = [ - "Submitter", "logger", "check_latest_version", ] diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 2db771da65..8d5695e4e4 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -4,10 +4,8 @@ import json import attr from pydra.utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag -from pydra.utils.hash import hash_function -from .helpers import ensure_list, gather_runtime_info -from .specs import attr_fields from fileformats.core import FileSet +from pydra.utils.hash import hash_function try: import importlib_resources @@ -36,6 +34,8 @@ def __init__(self, audit_flags, messengers, messenger_args, develop=None): If True, the local context.jsonld file is used, otherwise the one from github is used. """ + from .helpers import ensure_list + self.audit_flags = audit_flags self.messengers = ensure_list(messengers) self.messenger_args = messenger_args @@ -93,6 +93,8 @@ def monitor(self): def finalize_audit(self, result): """End auditing.""" if self.audit_check(AuditFlag.RESOURCE): + from .helpers import gather_runtime_info + self.resource_monitor.stop() result.runtime = gather_runtime_info(self.resource_monitor.fname) if self.audit_check(AuditFlag.PROV): @@ -178,6 +180,7 @@ def audit_check(self, flag): def audit_task(self, task): import subprocess as sp + from .helpers import attr_fields label = task.name diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 13123cb02a..4607e23f71 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -21,20 +21,18 @@ from . import helpers_state as hlpst from .specs import ( File, - BaseSpec, + # BaseSpec, RuntimeSpec, Result, - SpecInfo, - LazyIn, - LazyOut, - LazyField, + # SpecInfo, + # LazyIn, + # LazyOut, TaskHook, - attr_fields, - StateArray, ) from .helpers import ( # make_klass, create_checksum, + attr_fields, print_help, load_result, save, @@ -592,140 +590,6 @@ def _collect_outputs(self, output_dir): ) return attr.evolve(output, **self.output_, **other_output) - def split( - self, - splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, - overwrite: bool = False, - cont_dim: ty.Optional[dict] = None, - **inputs, - ): - """ - Run this task parametrically over lists of split inputs. - - Parameters - ---------- - splitter : str or list[str] or tuple[str] or None - the fields which to split over. If splitting over multiple fields, lists of - fields are interpreted as outer-products and tuples inner-products. If None, - then the fields to split are taken from the keyword-arg names. - overwrite : bool, optional - whether to overwrite an existing split on the node, by default False - cont_dim : dict, optional - Container dimensions for specific inputs, used in the splitter. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - **split_inputs - fields to split over, will automatically be wrapped in a StateArray object - and passed to the node inputs - - Returns - ------- - self : TaskBase - a reference to the task - """ - if self._lzout: - raise RuntimeError( - f"Cannot split {self} as its output interface has already been accessed" - ) - if splitter is None and inputs: - splitter = list(inputs) - elif splitter: - missing = set(hlpst.unwrap_splitter(splitter)) - set(inputs) - missing = [m for m in missing if not m.startswith("_")] - if missing: - raise ValueError( - f"Split is missing values for the following fields {list(missing)}" - ) - splitter = hlpst.add_name_splitter(splitter, self.name) - # if user want to update the splitter, overwrite has to be True - if self.state and not overwrite and self.state.splitter != splitter: - raise Exception( - "splitter has been already set, " - "if you want to overwrite it - use overwrite=True" - ) - if cont_dim: - for key, vel in cont_dim.items(): - self._cont_dim[f"{self.name}.{key}"] = vel - if inputs: - new_inputs = {} - split_inputs = set( - f"{self.name}.{n}" if "." not in n else n - for n in hlpst.unwrap_splitter(splitter) - if not n.startswith("_") - ) - for inpt_name, inpt_val in inputs.items(): - new_val: ty.Any - if f"{self.name}.{inpt_name}" in split_inputs: # type: ignore - if isinstance(inpt_val, LazyField): - new_val = inpt_val.split(splitter) - elif isinstance(inpt_val, ty.Iterable) and not isinstance( - inpt_val, (ty.Mapping, str) - ): - new_val = StateArray(inpt_val) - else: - raise TypeError( - f"Could not split {inpt_val} as it is not a sequence type" - ) - else: - new_val = inpt_val - new_inputs[inpt_name] = new_val - self.inputs = attr.evolve(self.inputs, **new_inputs) - if not self.state or splitter != self.state.splitter: - self.set_state(splitter) - return self - - def combine( - self, - combiner: ty.Union[ty.List[str], str], - overwrite: bool = False, # **kwargs - ): - """ - Combine inputs parameterized by one or more previous tasks. - - Parameters - ---------- - combiner : list[str] or str - the - overwrite : bool - whether to overwrite an existing combiner on the node - **kwargs : dict[str, Any] - values for the task that will be "combined" before they are provided to the - node - - Returns - ------- - self : TaskBase - a reference to the task - """ - if self._lzout: - raise RuntimeError( - f"Cannot combine {self} as its output interface has already been " - "accessed" - ) - if not isinstance(combiner, (str, list)): - raise Exception("combiner has to be a string or a list") - combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) - if ( - self.state - and self.state.combiner - and combiner != self.state.combiner - and not overwrite - ): - raise Exception( - "combiner has been already set, " - "if you want to overwrite it - use overwrite=True" - ) - if not self.state: - self.split(splitter=None) - # a task can have a combiner without a splitter - # if is connected to one with a splitter; - # self.fut_combiner will be used later as a combiner - self.fut_combiner = combiner - else: # self.state and not self.state.combiner - self.combiner = combiner - self.set_state(splitter=self.state.splitter, combiner=self.combiner) - return self - def _extract_input_el(self, inputs, inp_nm, ind): """ Extracting element of the inputs taking into account @@ -955,13 +819,11 @@ def _check_for_hash_changes(self): def _sanitize_spec( - spec: ty.Union[ - SpecInfo, ty.List[str], ty.Dict[str, ty.Type[ty.Any]], BaseSpec, None - ], + spec: ty.Union[ty.List[str], ty.Dict[str, ty.Type[ty.Any]], None], wf_name: str, spec_name: str, allow_empty: bool = False, -) -> SpecInfo: +): """Makes sure the provided input specifications are valid. If the input specification is a list of strings, this will @@ -1040,14 +902,12 @@ def __init__( cache_dir=None, cache_locations=None, input_spec: ty.Optional[ - ty.Union[ty.List[ty.Text], ty.Dict[ty.Text, ty.Type[ty.Any]], SpecInfo] + ty.Union[ty.List[ty.Text], ty.Dict[ty.Text, ty.Type[ty.Any]]] ] = None, cont_dim=None, messenger_args=None, messengers=None, - output_spec: ty.Optional[ - ty.Union[ty.List[str], ty.Dict[str, type], SpecInfo, BaseSpec] - ] = None, + output_spec: ty.Optional[ty.Union[ty.List[str], ty.Dict[str, type]]] = None, rerun=False, propagate_rerun=True, **kwargs, @@ -1338,91 +1198,91 @@ async def _run_task(self, submitter, rerun=False, environment=None): # at this point Workflow is stateless so this should be fine await submitter.expand_workflow(self, rerun=rerun) - def set_output( - self, - connections: ty.Union[ - ty.Tuple[str, LazyField], ty.List[ty.Tuple[str, LazyField]] - ], - ): - """ - Set outputs of the workflow by linking them with lazy outputs of tasks - - Parameters - ---------- - connections : tuple[str, LazyField] or list[tuple[str, LazyField]] or None - single or list of tuples linking the name of the output to a lazy output - of a task in the workflow. - """ - from pydra.utils.typing import TypeParser - - if self._connections is None: - self._connections = [] - if isinstance(connections, tuple) and len(connections) == 2: - new_connections = [connections] - elif isinstance(connections, list) and all( - [len(el) == 2 for el in connections] - ): - new_connections = connections - elif isinstance(connections, dict): - new_connections = list(connections.items()) - else: - raise TypeError( - "Connections can be a 2-elements tuple, a list of these tuples, or dictionary" - ) - # checking if a new output name is already in the connections - connection_names = [name for name, _ in self._connections] - if self.output_spec: - output_types = {a.name: a.type for a in attr.fields(self.interface.Outputs)} - else: - output_types = {} - # Check for type matches with explicitly defined outputs - conflicting = [] - type_mismatches = [] - for conn_name, lazy_field in new_connections: - if conn_name in connection_names: - conflicting.append(conn_name) - try: - output_type = output_types[conn_name] - except KeyError: - pass - else: - if not TypeParser.matches_type(lazy_field.type, output_type): - type_mismatches.append((conn_name, output_type, lazy_field.type)) - if conflicting: - raise ValueError(f"the output names {conflicting} are already set") - if type_mismatches: - raise TypeError( - f"the types of the following outputs of {self} don't match their declared types: " - + ", ".join( - f"{n} (expected: {ex}, provided: {p})" - for n, ex, p in type_mismatches - ) - ) - self._connections += new_connections - fields = [] - for con in self._connections: - wf_out_nm, lf = con - task_nm, task_out_nm = lf.name, lf.field - if task_out_nm == "all_": - help_string = f"all outputs from {task_nm}" - fields.append((wf_out_nm, dict, {"help_string": help_string})) - else: - from pydra.utils.typing import TypeParser - - # getting information about the output field from the task output_spec - # providing proper type and some help string - task_output_spec = getattr(self, task_nm).output_spec - out_fld = attr.fields_dict(task_output_spec)[task_out_nm] - help_string = ( - f"{out_fld.metadata.get('help_string', '')} (from {task_nm})" - ) - if TypeParser.get_origin(lf.type) is StateArray: - type_ = TypeParser.get_item_type(lf.type) - else: - type_ = lf.type - fields.append((wf_out_nm, type_, {"help_string": help_string})) - self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseSpec,)) - logger.info("Added %s to %s", self.output_spec, self) + # def set_output( + # self, + # connections: ty.Union[ + # ty.Tuple[str, LazyField], ty.List[ty.Tuple[str, LazyField]] + # ], + # ): + # """ + # Set outputs of the workflow by linking them with lazy outputs of tasks + + # Parameters + # ---------- + # connections : tuple[str, LazyField] or list[tuple[str, LazyField]] or None + # single or list of tuples linking the name of the output to a lazy output + # of a task in the workflow. + # """ + # from pydra.utils.typing import TypeParser + + # if self._connections is None: + # self._connections = [] + # if isinstance(connections, tuple) and len(connections) == 2: + # new_connections = [connections] + # elif isinstance(connections, list) and all( + # [len(el) == 2 for el in connections] + # ): + # new_connections = connections + # elif isinstance(connections, dict): + # new_connections = list(connections.items()) + # else: + # raise TypeError( + # "Connections can be a 2-elements tuple, a list of these tuples, or dictionary" + # ) + # # checking if a new output name is already in the connections + # connection_names = [name for name, _ in self._connections] + # if self.output_spec: + # output_types = {a.name: a.type for a in attr.fields(self.interface.Outputs)} + # else: + # output_types = {} + # # Check for type matches with explicitly defined outputs + # conflicting = [] + # type_mismatches = [] + # for conn_name, lazy_field in new_connections: + # if conn_name in connection_names: + # conflicting.append(conn_name) + # try: + # output_type = output_types[conn_name] + # except KeyError: + # pass + # else: + # if not TypeParser.matches_type(lazy_field.type, output_type): + # type_mismatches.append((conn_name, output_type, lazy_field.type)) + # if conflicting: + # raise ValueError(f"the output names {conflicting} are already set") + # if type_mismatches: + # raise TypeError( + # f"the types of the following outputs of {self} don't match their declared types: " + # + ", ".join( + # f"{n} (expected: {ex}, provided: {p})" + # for n, ex, p in type_mismatches + # ) + # ) + # self._connections += new_connections + # fields = [] + # for con in self._connections: + # wf_out_nm, lf = con + # task_nm, task_out_nm = lf.name, lf.field + # if task_out_nm == "all_": + # help_string = f"all outputs from {task_nm}" + # fields.append((wf_out_nm, dict, {"help_string": help_string})) + # else: + # from pydra.utils.typing import TypeParser + + # # getting information about the output field from the task output_spec + # # providing proper type and some help string + # task_output_spec = getattr(self, task_nm).output_spec + # out_fld = attr.fields_dict(task_output_spec)[task_out_nm] + # help_string = ( + # f"{out_fld.metadata.get('help_string', '')} (from {task_nm})" + # ) + # if TypeParser.get_origin(lf.type) is StateArray: + # type_ = TypeParser.get_item_type(lf.type) + # else: + # type_ = lf.type + # fields.append((wf_out_nm, type_, {"help_string": help_string})) + # self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseSpec,)) + # logger.info("Added %s to %s", self.output_spec, self) def _collect_outputs(self): output_klass = self.interface.Outputs diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index f443a6fe69..92efc9de53 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -7,29 +7,47 @@ import sys from uuid import uuid4 import getpass +import typing as ty import subprocess as sp import re from time import strftime from traceback import format_exception -import attr +import attrs from filelock import SoftFileLock, Timeout import cloudpickle as cp -from .specs import ( - Runtime, - attr_fields, - Result, - LazyField, -) from .helpers_file import copy_nested_files from fileformats.core import FileSet +if ty.TYPE_CHECKING: + from .specs import TaskSpec + from pydra.design.base import Field + + +PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" + + +def attr_fields(spec, exclude_names=()): + return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] + + +def list_fields(interface: "TaskSpec") -> list["Field"]: + if not attrs.has(interface): + return [] + return [ + f.metadata[PYDRA_ATTR_METADATA] + for f in attrs.fields(interface) + if PYDRA_ATTR_METADATA in f.metadata + ] + # from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile def from_list_if_single(obj): """Converts a list to a single item if it is of length == 1""" - if obj is attr.NOTHING: + from pydra.engine.workflow.lazy import LazyField + + if obj is attrs.NOTHING: return obj if isinstance(obj, LazyField): return obj @@ -42,11 +60,11 @@ def from_list_if_single(obj): def print_help(obj): """Visit a task object and print its input/output interface.""" lines = [f"Help for {obj.__class__.__name__}"] - if attr.fields(obj.interface): + if attrs.fields(obj.interface): lines += ["Input Parameters:"] - for f in attr.fields(obj.interface): + for f in attrs.fields(obj.interface): default = "" - if f.default != attr.NOTHING and not f.name.startswith("_"): + if f.default != attrs.NOTHING and not f.name.startswith("_"): default = f" (default: {f.default})" try: name = f.type.__name__ @@ -54,9 +72,9 @@ def print_help(obj): name = str(f.type) lines += [f"- {f.name}: {name}{default}"] output_klass = obj.interface.Outputs - if attr.fields(output_klass): + if attrs.fields(output_klass): lines += ["Output Parameters:"] - for f in attr.fields(output_klass): + for f in attrs.fields(output_klass): try: name = f.type.__name__ except AttributeError: @@ -154,6 +172,8 @@ def gather_runtime_info(fname): A runtime object containing the collected information. """ + from .specs import Runtime + runtime = Runtime(rss_peak_gb=None, vms_peak_gb=None, cpu_peak_percent=None) # Read .prof file in and set runtime values @@ -370,9 +390,9 @@ def get_open_loop(): # TODO # """ -# current_output_spec_names = [f.name for f in attr.fields(interface.Outputs)] +# current_output_spec_names = [f.name for f in attrs.fields(interface.Outputs)] # new_fields = [] -# for fld in attr.fields(interface): +# for fld in attrs.fields(interface): # if "output_file_template" in fld.metadata: # if "output_field_name" in fld.metadata: # field_name = fld.metadata["output_field_name"] @@ -382,7 +402,7 @@ def get_open_loop(): # if field_name not in current_output_spec_names: # # TODO: should probably remove some of the keys # new_fields.append( -# (field_name, attr.ib(type=File, metadata=fld.metadata)) +# (field_name, attrs.field(type=File, metadata=fld.metadata)) # ) # output_spec.fields += new_fields # return output_spec @@ -423,6 +443,9 @@ def load_and_run( loading a task from a pickle file, settings proper input and running the task """ + + from .specs import Result + try: task = load_task(task_pkl=task_pkl, ind=ind) except Exception: @@ -470,7 +493,7 @@ def load_task(task_pkl, ind=None): task = cp.loads(task_pkl.read_bytes()) if ind is not None: ind_inputs = task.get_input_el(ind) - task.inputs = attr.evolve(task.inputs, **ind_inputs) + task.inputs = attrs.evolve(task.inputs, **ind_inputs) task._pre_split = True task.state = None # resetting uid for task @@ -540,7 +563,7 @@ async def __aexit__(self, exc_type, exc_value, traceback): return None -def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.any): +def parse_copyfile(fld: attrs.Attribute, default_collation=FileSet.CopyCollation.any): """Gets the copy mode from the 'copyfile' value from a field attribute""" copyfile = fld.metadata.get("copyfile", FileSet.CopyMode.any) if isinstance(copyfile, tuple): @@ -580,7 +603,7 @@ def parse_format_string(fmtstr): identifier = r"[a-zA-Z_]\w*" attribute = rf"\.{identifier}" item = r"\[\w+\]" - # Example: var.attr[key][0].attr2 (capture "var") + # Example: var.attrs[key][0].attr2 (capture "var") field_with_lookups = ( f"({identifier})(?:{attribute}|{item})*" # Capture only the keyword ) @@ -614,8 +637,10 @@ def ensure_list(obj, tuple2list=False): [5.0] """ - if obj is attr.NOTHING: - return attr.NOTHING + from pydra.engine.workflow.lazy import LazyField + + if obj is attrs.NOTHING: + return attrs.NOTHING if obj is None: return [] # list or numpy.array (this might need some extra flag in case an array has to be converted) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index f1430aad41..0cdc4f07f2 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -2,198 +2,166 @@ from pathlib import Path import typing as ty -import inspect -import re -import os -from copy import copy -from glob import glob -import attr -from typing_extensions import Self -from fileformats.core import FileSet -from fileformats.generic import ( - File, - Directory, -) -import pydra.engine -from .helpers_file import template_update_single -from pydra.utils.hash import hash_function, Cache - -# from pydra.utils.misc import add_exc_note - - -T = ty.TypeVar("T") - - -def attr_fields(spec, exclude_names=()): - return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] - - -# These are special types that are checked for in the construction of input/output specs -# and special converters inserted into the attrs fields. - - -class MultiInputObj(list, ty.Generic[T]): - pass - - -MultiInputFile = MultiInputObj[File] - -# Since we can't create a NewType from a type union, we add a dummy type to the union -# so we can detect the MultiOutput in the input/output spec creation -class MultiOutputType: - pass +# import inspect +# import re +# import os +from pydra.engine.audit import AuditFlag +# from glob import glob +import attrs +from typing_extensions import Self -MultiOutputObj = ty.Union[list, object, MultiOutputType] -MultiOutputFile = ty.Union[File, ty.List[File], MultiOutputType] - -OUTPUT_TEMPLATE_TYPES = ( - Path, - ty.List[Path], - ty.Union[Path, bool], - ty.Union[ty.List[Path], bool], - ty.List[ty.List[Path]], +# from fileformats.core import FileSet +from fileformats.generic import ( + File, + # Directory, ) +from .helpers import attr_fields +# from .helpers_file import template_update_single +# from pydra.utils.hash import hash_function, Cache -@attr.s(auto_attribs=True, kw_only=True) -class SpecInfo: - """Base data structure for metadata of specifications.""" - - name: str - """A name for the specification.""" - fields: ty.List[ty.Tuple] = attr.ib(factory=list) - """List of names of fields (can be inputs or outputs).""" - bases: ty.Sequence[ty.Type["BaseSpec"]] = attr.ib(factory=tuple) - """Keeps track of specification inheritance. - Should be a tuple containing at least one BaseSpec """ - - -@attr.s(auto_attribs=True, kw_only=True) -class BaseSpec: - """The base dataclass specs for all inputs and outputs.""" - - def collect_additional_outputs(self, inputs, output_dir, outputs): - """Get additional outputs.""" - return {} - - @property - def hash(self): - hsh, self._hashes = self._compute_hashes() - return hsh - - def hash_changes(self): - """Detects any changes in the hashed values between the current inputs and the - previously calculated values""" - _, new_hashes = self._compute_hashes() - return [k for k, v in new_hashes.items() if v != self._hashes[k]] - - def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: - """Compute a basic hash for any given set of fields.""" - inp_dict = {} - for field in attr_fields( - self, exclude_names=("_graph_checksums", "bindings", "files_hash") - ): - if field.metadata.get("output_file_template"): - continue - # removing values that are not set from hash calculation - if getattr(self, field.name) is attr.NOTHING: - continue - if "container_path" in field.metadata: - continue - inp_dict[field.name] = getattr(self, field.name) - hash_cache = Cache() - field_hashes = { - k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() - } - if hasattr(self, "_graph_checksums"): - field_hashes["_graph_checksums"] = self._graph_checksums - return hash_function(sorted(field_hashes.items())), field_hashes - - def retrieve_values(self, wf, state_index: ty.Optional[int] = None): - """Get values contained by this spec.""" - retrieved_values = {} - for field in attr_fields(self): - value = getattr(self, field.name) - if isinstance(value, LazyField): - retrieved_values[field.name] = value.get_value( - wf, state_index=state_index - ) - for field, val in retrieved_values.items(): - setattr(self, field, val) - - def check_fields_input_spec(self): - """ - Check fields from input spec based on the medatada. +# from pydra.utils.misc import add_exc_note - e.g., if xor, requires are fulfilled, if value provided when mandatory. - """ - fields = attr_fields(self) - - for field in fields: - field_is_mandatory = bool(field.metadata.get("mandatory")) - field_is_unset = getattr(self, field.name) is attr.NOTHING - - if field_is_unset and not field_is_mandatory: - continue - - # Collect alternative fields associated with this field. - alternative_fields = { - name: getattr(self, name) is not attr.NOTHING - for name in field.metadata.get("xor", []) - if name != field.name - } - alternatives_are_set = any(alternative_fields.values()) - - # Raise error if no field in mandatory alternative group is set. - if field_is_unset: - if alternatives_are_set: - continue - message = f"{field.name} is mandatory and unset." - if alternative_fields: - raise AttributeError( - message[:-1] - + f", but no alternative provided by {list(alternative_fields)}." - ) - else: - raise AttributeError(message) - - # Raise error if multiple alternatives are set. - elif alternatives_are_set: - set_alternative_fields = [ - name for name, is_set in alternative_fields.items() if is_set - ] - raise AttributeError( - f"{field.name} is mutually exclusive with {set_alternative_fields}" - ) - - # Collect required fields associated with this field. - required_fields = { - name: getattr(self, name) is not attr.NOTHING - for name in field.metadata.get("requires", []) - if name != field.name - } - - # Raise error if any required field is unset. - if not all(required_fields.values()): - unset_required_fields = [ - name for name, is_set in required_fields.items() if not is_set - ] - raise AttributeError(f"{field.name} requires {unset_required_fields}") - - def check_metadata(self): - """Check contained metadata.""" - - def template_update(self): - """Update template.""" - - def copyfile_input(self, output_dir): - """Copy the file pointed by a :class:`File` input.""" - - -@attr.s(auto_attribs=True, kw_only=True) +# @attrs.define(auto_attribs=True, kw_only=True) +# class SpecInfo: +# """Base data structure for metadata of specifications.""" + +# name: str +# """A name for the specification.""" +# fields: ty.List[ty.Tuple] = attrs.field(factory=list) +# """List of names of fields (can be inputs or outputs).""" +# bases: ty.Sequence[ty.Type["BaseSpec"]] = attrs.field(factory=tuple) +# """Keeps track of specification inheritance. +# Should be a tuple containing at least one BaseSpec """ + + +# @attrs.define(auto_attribs=True, kw_only=True) +# class BaseSpec: +# """The base dataclass specs for all inputs and outputs.""" + +# def collect_additional_outputs(self, inputs, output_dir, outputs): +# """Get additional outputs.""" +# return {} + +# @property +# def hash(self): +# hsh, self._hashes = self._compute_hashes() +# return hsh + +# def hash_changes(self): +# """Detects any changes in the hashed values between the current inputs and the +# previously calculated values""" +# _, new_hashes = self._compute_hashes() +# return [k for k, v in new_hashes.items() if v != self._hashes[k]] + +# def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: +# """Compute a basic hash for any given set of fields.""" +# inp_dict = {} +# for field in attr_fields( +# self, exclude_names=("_graph_checksums", "bindings", "files_hash") +# ): +# if field.metadata.get("output_file_template"): +# continue +# # removing values that are not set from hash calculation +# if getattr(self, field.name) is attrs.NOTHING: +# continue +# if "container_path" in field.metadata: +# continue +# inp_dict[field.name] = getattr(self, field.name) +# hash_cache = Cache() +# field_hashes = { +# k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() +# } +# if hasattr(self, "_graph_checksums"): +# field_hashes["_graph_checksums"] = self._graph_checksums +# return hash_function(sorted(field_hashes.items())), field_hashes + +# def retrieve_values(self, wf, state_index: ty.Optional[int] = None): +# """Get values contained by this spec.""" +# retrieved_values = {} +# for field in attr_fields(self): +# value = getattr(self, field.name) +# if isinstance(value, LazyField): +# retrieved_values[field.name] = value.get_value( +# wf, state_index=state_index +# ) +# for field, val in retrieved_values.items(): +# setattr(self, field, val) + +# def check_fields_input_spec(self): +# """ +# Check fields from input spec based on the medatada. + +# e.g., if xor, requires are fulfilled, if value provided when mandatory. + +# """ +# fields = attr_fields(self) + +# for field in fields: +# field_is_mandatory = bool(field.metadata.get("mandatory")) +# field_is_unset = getattr(self, field.name) is attrs.NOTHING + +# if field_is_unset and not field_is_mandatory: +# continue + +# # Collect alternative fields associated with this field. +# alternative_fields = { +# name: getattr(self, name) is not attrs.NOTHING +# for name in field.metadata.get("xor", []) +# if name != field.name +# } +# alternatives_are_set = any(alternative_fields.values()) + +# # Raise error if no field in mandatory alternative group is set. +# if field_is_unset: +# if alternatives_are_set: +# continue +# message = f"{field.name} is mandatory and unset." +# if alternative_fields: +# raise AttributeError( +# message[:-1] +# + f", but no alternative provided by {list(alternative_fields)}." +# ) +# else: +# raise AttributeError(message) + +# # Raise error if multiple alternatives are set. +# elif alternatives_are_set: +# set_alternative_fields = [ +# name for name, is_set in alternative_fields.items() if is_set +# ] +# raise AttributeError( +# f"{field.name} is mutually exclusive with {set_alternative_fields}" +# ) + +# # Collect required fields associated with this field. +# required_fields = { +# name: getattr(self, name) is not attrs.NOTHING +# for name in field.metadata.get("requires", []) +# if name != field.name +# } + +# # Raise error if any required field is unset. +# if not all(required_fields.values()): +# unset_required_fields = [ +# name for name, is_set in required_fields.items() if not is_set +# ] +# raise AttributeError(f"{field.name} requires {unset_required_fields}") + +# def check_metadata(self): +# """Check contained metadata.""" + +# def template_update(self): +# """Update template.""" + +# def copyfile_input(self, output_dir): +# """Copy the file pointed by a :class:`File` input.""" + + +@attrs.define(auto_attribs=True, kw_only=True) class Runtime: """Represent run time metadata.""" @@ -205,7 +173,7 @@ class Runtime: """Peak in cpu consumption.""" -@attr.s(auto_attribs=True, kw_only=True) +@attrs.define(auto_attribs=True, kw_only=True) class Result: """Metadata regarding the outputs of processing.""" @@ -218,15 +186,15 @@ def __getstate__(self): if state["output"] is not None: fields = tuple((el.name, el.type) for el in attr_fields(state["output"])) state["output_spec"] = (state["output"].__class__.__name__, fields) - state["output"] = attr.asdict(state["output"], recurse=False) + state["output"] = attrs.asdict(state["output"], recurse=False) return state def __setstate__(self, state): if "output_spec" in state: spec = list(state["output_spec"]) del state["output_spec"] - klass = attr.make_class( - spec[0], {k: attr.ib(type=v) for k, v in list(spec[1])} + klass = attrs.make_class( + spec[0], {k: attrs.field(type=v) for k, v in list(spec[1])} ) state["output"] = klass(**state["output"]) self.__dict__.update(state) @@ -240,12 +208,12 @@ def get_output_field(self, field_name): Name of field in LazyField object """ if field_name == "all_": - return attr.asdict(self.output, recurse=False) + return attrs.asdict(self.output, recurse=False) else: return getattr(self.output, field_name) -@attr.s(auto_attribs=True, kw_only=True) +@attrs.define(auto_attribs=True, kw_only=True) class RuntimeSpec: """ Specification for a task. @@ -270,812 +238,679 @@ class RuntimeSpec: network: bool = False -@attr.s(auto_attribs=True, kw_only=True) -class FunctionSpec(BaseSpec): - """Specification for a process invoked from a shell.""" - - def check_metadata(self): - """ - Check the metadata for fields in input_spec and fields. - - Also sets the default values when available and needed. - - """ - supported_keys = { - "allowed_values", - "copyfile", - "help_string", - "mandatory", - # "readonly", #likely not needed - # "output_field_name", #likely not needed - # "output_file_template", #likely not needed - "requires", - "keep_extension", - "xor", - "sep", - } - for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): - mdata = fld.metadata - # checking keys from metadata - if set(mdata.keys()) - supported_keys: - raise AttributeError( - f"only these keys are supported {supported_keys}, but " - f"{set(mdata.keys()) - supported_keys} provided" - ) - # checking if the help string is provided (required field) - if "help_string" not in mdata: - raise AttributeError(f"{fld.name} doesn't have help_string field") - # not allowing for default if the field is mandatory - if not fld.default == attr.NOTHING and mdata.get("mandatory"): - raise AttributeError( - f"default value ({fld.default!r}) should not be set when the field " - f"('{fld.name}') in {self}) is mandatory" - ) - # setting default if value not provided and default is available - if getattr(self, fld.name) is None: - if not fld.default == attr.NOTHING: - setattr(self, fld.name, fld.default) - - -@attr.s(auto_attribs=True, kw_only=True) -class ShellSpec(BaseSpec): - """Specification for a process invoked from a shell.""" - - executable: ty.Union[str, ty.List[str]] = attr.ib( - metadata={ - "help_string": "the first part of the command, can be a string, " - "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" - } - ) - args: ty.Union[str, ty.List[str], None] = attr.ib( - None, - metadata={ - "help_string": "the last part of the command, can be a string, " - "e.g. , or a list" - }, - ) - - def retrieve_values(self, wf, state_index=None): - """Parse output results.""" - temp_values = {} - for field in attr_fields(self): - # retrieving values that do not have templates - if not field.metadata.get("output_file_template"): - value = getattr(self, field.name) - if isinstance(value, LazyField): - temp_values[field.name] = value.get_value( - wf, state_index=state_index - ) - for field, val in temp_values.items(): - value = path_to_string(value) - setattr(self, field, val) - - def check_metadata(self): - """ - Check the metadata for fields in input_spec and fields. - - Also sets the default values when available and needed. - - """ - from pydra.utils.typing import TypeParser - - supported_keys = { - "allowed_values", - "argstr", - "container_path", - "copyfile", - "help_string", - "mandatory", - "readonly", - "output_field_name", - "output_file_template", - "position", - "requires", - "keep_extension", - "xor", - "sep", - "formatter", - "_output_type", - } - - for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): - mdata = fld.metadata - # checking keys from metadata - if set(mdata.keys()) - supported_keys: - raise AttributeError( - f"only these keys are supported {supported_keys}, but " - f"{set(mdata.keys()) - supported_keys} provided for '{fld.name}' " - f"field in {self}" - ) - # checking if the help string is provided (required field) - if "help_string" not in mdata: - raise AttributeError( - f"{fld.name} doesn't have help_string field in {self}" - ) - # assuming that fields with output_file_template shouldn't have default - if mdata.get("output_file_template"): - if not any( - TypeParser.matches_type(fld.type, t) for t in OUTPUT_TEMPLATE_TYPES - ): - raise TypeError( - f"Type of '{fld.name}' should be one of {OUTPUT_TEMPLATE_TYPES} " - f"(not {fld.type}) because it has a value for output_file_template " - f"({mdata['output_file_template']!r})" - ) - if fld.default not in [attr.NOTHING, True, False]: - raise AttributeError( - f"default value ({fld.default!r}) should not be set together with " - f"output_file_template ({mdata['output_file_template']!r}) for " - f"'{fld.name}' field in {self}" - ) - # not allowing for default if the field is mandatory - if not fld.default == attr.NOTHING and mdata.get("mandatory"): - raise AttributeError( - f"default value ({fld.default!r}) should not be set when the field " - f"('{fld.name}') in {self}) is mandatory" - ) - # setting default if value not provided and default is available - if getattr(self, fld.name) is None: - if not fld.default == attr.NOTHING: - setattr(self, fld.name, fld.default) - - -@attr.s(auto_attribs=True, kw_only=True) -class ShellOutSpec: - """Output specification of a generic shell process.""" - - return_code: int - """The process' exit code.""" - stdout: str - """The process' standard output.""" - stderr: str - """The process' standard input.""" - - def collect_additional_outputs(self, inputs, output_dir, outputs): - from pydra.utils.typing import TypeParser - - """Collect additional outputs from shelltask output_spec.""" - additional_out = {} - for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): - if not TypeParser.is_subclass( - fld.type, - ( - os.PathLike, - MultiOutputObj, - int, - float, - bool, - str, - list, - ), - ): - raise TypeError( - f"Support for {fld.type} type, required for '{fld.name}' in {self}, " - "has not been implemented in collect_additional_output" - ) - # assuming that field should have either default or metadata, but not both - input_value = getattr(inputs, fld.name, attr.NOTHING) - if fld.metadata and "callable" in fld.metadata: - fld_out = self._field_metadata(fld, inputs, output_dir, outputs) - elif fld.type in [int, float, bool, str, list]: - raise AttributeError(f"{fld.type} has to have a callable in metadata") - elif input_value: # Map input value through to output - fld_out = input_value - elif fld.default != attr.NOTHING: - fld_out = self._field_defaultvalue(fld, output_dir) - else: - raise AttributeError("File has to have default value or metadata") - if TypeParser.contains_type(FileSet, fld.type): - label = f"output field '{fld.name}' of {self}" - fld_out = TypeParser(fld.type, label=label).coerce(fld_out) - additional_out[fld.name] = fld_out - return additional_out - - def generated_output_names(self, inputs, output_dir): - """Returns a list of all outputs that will be generated by the task. - Takes into account the task input and the requires list for the output fields. - TODO: should be in all Output specs? - """ - # checking the input (if all mandatory fields are provided, etc.) - inputs.check_fields_input_spec() - output_names = ["return_code", "stdout", "stderr"] - for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): - if fld.type not in [File, MultiOutputFile, Directory]: - raise Exception("not implemented (collect_additional_output)") - # assuming that field should have either default or metadata, but not both - if ( - fld.default in (None, attr.NOTHING) and not fld.metadata - ): # TODO: is it right? - raise AttributeError("File has to have default value or metadata") - elif fld.default != attr.NOTHING: - output_names.append(fld.name) - elif ( - fld.metadata - and self._field_metadata( - fld, inputs, output_dir, outputs=None, check_existance=False - ) - != attr.NOTHING - ): - output_names.append(fld.name) - return output_names - - def _field_defaultvalue(self, fld, output_dir): - """Collect output file if the default value specified.""" - if not isinstance(fld.default, (str, Path)): - raise AttributeError( - f"{fld.name} is a File, so default value " - f"should be a string or a Path, " - f"{fld.default} provided" - ) - default = fld.default - if isinstance(default, str): - default = Path(default) - - default = output_dir / default - if "*" not in str(default): - if default.exists(): - return default - else: - raise AttributeError(f"file {default} does not exist") - else: - all_files = [Path(el) for el in glob(str(default.expanduser()))] - if len(all_files) > 1: - return all_files - elif len(all_files) == 1: - return all_files[0] - else: - raise AttributeError(f"no file matches {default.name}") - - def _field_metadata( - self, fld, inputs, output_dir, outputs=None, check_existance=True - ): - """Collect output file if metadata specified.""" - if self._check_requires(fld, inputs) is False: - return attr.NOTHING - - if "value" in fld.metadata: - return output_dir / fld.metadata["value"] - # this block is only run if "output_file_template" is provided in output_spec - # if the field is set in input_spec with output_file_template, - # than the field already should have value - elif "output_file_template" in fld.metadata: - value = template_update_single( - fld, inputs=inputs, output_dir=output_dir, spec_type="output" - ) - - if fld.type is MultiOutputFile and type(value) is list: - # TODO: how to deal with mandatory list outputs - ret = [] - for val in value: - val = Path(val) - if check_existance and not val.exists(): - ret.append(attr.NOTHING) - else: - ret.append(val) - return ret - else: - val = Path(value) - # checking if the file exists - if check_existance and not val.exists(): - # if mandatory raise exception - if "mandatory" in fld.metadata: - if fld.metadata["mandatory"]: - raise Exception( - f"mandatory output for variable {fld.name} does not exist" - ) - return attr.NOTHING - return val - elif "callable" in fld.metadata: - callable_ = fld.metadata["callable"] - if isinstance(callable_, staticmethod): - # In case callable is defined as a static method, - # retrieve the function wrapped in the descriptor. - callable_ = callable_.__func__ - call_args = inspect.getfullargspec(callable_) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = fld - elif argnm == "output_dir": - call_args_val[argnm] = output_dir - elif argnm == "inputs": - call_args_val[argnm] = inputs - elif argnm == "stdout": - call_args_val[argnm] = outputs["stdout"] - elif argnm == "stderr": - call_args_val[argnm] = outputs["stderr"] - else: - try: - call_args_val[argnm] = getattr(inputs, argnm) - except AttributeError: - raise AttributeError( - f"arguments of the callable function from {fld.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - return callable_(**call_args_val) - else: - raise Exception( - f"Metadata for '{fld.name}', does not not contain any of the required fields " - f'("callable", "output_file_template" or "value"): {fld.metadata}.' - ) +# @attrs.define(auto_attribs=True, kw_only=True) +# class FunctionSpec(BaseSpec): +# """Specification for a process invoked from a shell.""" + +# def check_metadata(self): +# """ +# Check the metadata for fields in input_spec and fields. + +# Also sets the default values when available and needed. + +# """ +# supported_keys = { +# "allowed_values", +# "copyfile", +# "help_string", +# "mandatory", +# # "readonly", #likely not needed +# # "output_field_name", #likely not needed +# # "output_file_template", #likely not needed +# "requires", +# "keep_extension", +# "xor", +# "sep", +# } +# for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): +# mdata = fld.metadata +# # checking keys from metadata +# if set(mdata.keys()) - supported_keys: +# raise AttributeError( +# f"only these keys are supported {supported_keys}, but " +# f"{set(mdata.keys()) - supported_keys} provided" +# ) +# # checking if the help string is provided (required field) +# if "help_string" not in mdata: +# raise AttributeError(f"{fld.name} doesn't have help_string field") +# # not allowing for default if the field is mandatory +# if not fld.default == attrs.NOTHING and mdata.get("mandatory"): +# raise AttributeError( +# f"default value ({fld.default!r}) should not be set when the field " +# f"('{fld.name}') in {self}) is mandatory" +# ) +# # setting default if value not provided and default is available +# if getattr(self, fld.name) is None: +# if not fld.default == attrs.NOTHING: +# setattr(self, fld.name, fld.default) + + +# @attrs.define(auto_attribs=True, kw_only=True) +# class ShellSpec(BaseSpec): +# """Specification for a process invoked from a shell.""" + +# executable: ty.Union[str, ty.List[str]] = attrs.field( +# metadata={ +# "help_string": "the first part of the command, can be a string, " +# "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" +# } +# ) +# args: ty.Union[str, ty.List[str], None] = attrs.field( +# default=None, +# metadata={ +# "help_string": "the last part of the command, can be a string, " +# "e.g. , or a list" +# }, +# ) + +# def retrieve_values(self, wf, state_index=None): +# """Parse output results.""" +# temp_values = {} +# for field in attr_fields(self): +# # retrieving values that do not have templates +# if not field.metadata.get("output_file_template"): +# value = getattr(self, field.name) +# if isinstance(value, LazyField): +# temp_values[field.name] = value.get_value( +# wf, state_index=state_index +# ) +# for field, val in temp_values.items(): +# value = path_to_string(value) +# setattr(self, field, val) + +# def check_metadata(self): +# """ +# Check the metadata for fields in input_spec and fields. + +# Also sets the default values when available and needed. + +# """ +# from pydra.utils.typing import TypeParser + +# supported_keys = { +# "allowed_values", +# "argstr", +# "container_path", +# "copyfile", +# "help_string", +# "mandatory", +# "readonly", +# "output_field_name", +# "output_file_template", +# "position", +# "requires", +# "keep_extension", +# "xor", +# "sep", +# "formatter", +# "_output_type", +# } + +# for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): +# mdata = fld.metadata +# # checking keys from metadata +# if set(mdata.keys()) - supported_keys: +# raise AttributeError( +# f"only these keys are supported {supported_keys}, but " +# f"{set(mdata.keys()) - supported_keys} provided for '{fld.name}' " +# f"field in {self}" +# ) +# # checking if the help string is provided (required field) +# if "help_string" not in mdata: +# raise AttributeError( +# f"{fld.name} doesn't have help_string field in {self}" +# ) +# # assuming that fields with output_file_template shouldn't have default +# if mdata.get("output_file_template"): +# if not any( +# TypeParser.matches_type(fld.type, t) for t in OUTPUT_TEMPLATE_TYPES +# ): +# raise TypeError( +# f"Type of '{fld.name}' should be one of {OUTPUT_TEMPLATE_TYPES} " +# f"(not {fld.type}) because it has a value for output_file_template " +# f"({mdata['output_file_template']!r})" +# ) +# if fld.default not in [attrs.NOTHING, True, False]: +# raise AttributeError( +# f"default value ({fld.default!r}) should not be set together with " +# f"output_file_template ({mdata['output_file_template']!r}) for " +# f"'{fld.name}' field in {self}" +# ) +# # not allowing for default if the field is mandatory +# if not fld.default == attrs.NOTHING and mdata.get("mandatory"): +# raise AttributeError( +# f"default value ({fld.default!r}) should not be set when the field " +# f"('{fld.name}') in {self}) is mandatory" +# ) +# # setting default if value not provided and default is available +# if getattr(self, fld.name) is None: +# if not fld.default == attrs.NOTHING: +# setattr(self, fld.name, fld.default) + + +# @attrs.define(auto_attribs=True, kw_only=True) +# class ShellOutSpec: +# """Output specification of a generic shell process.""" + +# return_code: int +# """The process' exit code.""" +# stdout: str +# """The process' standard output.""" +# stderr: str +# """The process' standard input.""" + +# def collect_additional_outputs(self, inputs, output_dir, outputs): +# from pydra.utils.typing import TypeParser + +# """Collect additional outputs from shelltask output_spec.""" +# additional_out = {} +# for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): +# if not TypeParser.is_subclass( +# fld.type, +# ( +# os.PathLike, +# MultiOutputObj, +# int, +# float, +# bool, +# str, +# list, +# ), +# ): +# raise TypeError( +# f"Support for {fld.type} type, required for '{fld.name}' in {self}, " +# "has not been implemented in collect_additional_output" +# ) +# # assuming that field should have either default or metadata, but not both +# input_value = getattr(inputs, fld.name, attrs.NOTHING) +# if fld.metadata and "callable" in fld.metadata: +# fld_out = self._field_metadata(fld, inputs, output_dir, outputs) +# elif fld.type in [int, float, bool, str, list]: +# raise AttributeError(f"{fld.type} has to have a callable in metadata") +# elif input_value: # Map input value through to output +# fld_out = input_value +# elif fld.default != attrs.NOTHING: +# fld_out = self._field_defaultvalue(fld, output_dir) +# else: +# raise AttributeError("File has to have default value or metadata") +# if TypeParser.contains_type(FileSet, fld.type): +# label = f"output field '{fld.name}' of {self}" +# fld_out = TypeParser(fld.type, label=label).coerce(fld_out) +# additional_out[fld.name] = fld_out +# return additional_out + +# def generated_output_names(self, inputs, output_dir): +# """Returns a list of all outputs that will be generated by the task. +# Takes into account the task input and the requires list for the output fields. +# TODO: should be in all Output specs? +# """ +# # checking the input (if all mandatory fields are provided, etc.) +# inputs.check_fields_input_spec() +# output_names = ["return_code", "stdout", "stderr"] +# for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): +# if fld.type not in [File, MultiOutputFile, Directory]: +# raise Exception("not implemented (collect_additional_output)") +# # assuming that field should have either default or metadata, but not both +# if ( +# fld.default in (None, attrs.NOTHING) and not fld.metadata +# ): # TODO: is it right? +# raise AttributeError("File has to have default value or metadata") +# elif fld.default != attrs.NOTHING: +# output_names.append(fld.name) +# elif ( +# fld.metadata +# and self._field_metadata( +# fld, inputs, output_dir, outputs=None, check_existance=False +# ) +# != attrs.NOTHING +# ): +# output_names.append(fld.name) +# return output_names + +# def _field_defaultvalue(self, fld, output_dir): +# """Collect output file if the default value specified.""" +# if not isinstance(fld.default, (str, Path)): +# raise AttributeError( +# f"{fld.name} is a File, so default value " +# f"should be a string or a Path, " +# f"{fld.default} provided" +# ) +# default = fld.default +# if isinstance(default, str): +# default = Path(default) + +# default = output_dir / default +# if "*" not in str(default): +# if default.exists(): +# return default +# else: +# raise AttributeError(f"file {default} does not exist") +# else: +# all_files = [Path(el) for el in glob(str(default.expanduser()))] +# if len(all_files) > 1: +# return all_files +# elif len(all_files) == 1: +# return all_files[0] +# else: +# raise AttributeError(f"no file matches {default.name}") + +# def _field_metadata( +# self, fld, inputs, output_dir, outputs=None, check_existance=True +# ): +# """Collect output file if metadata specified.""" +# if self._check_requires(fld, inputs) is False: +# return attrs.NOTHING + +# if "value" in fld.metadata: +# return output_dir / fld.metadata["value"] +# # this block is only run if "output_file_template" is provided in output_spec +# # if the field is set in input_spec with output_file_template, +# # than the field already should have value +# elif "output_file_template" in fld.metadata: +# value = template_update_single( +# fld, inputs=inputs, output_dir=output_dir, spec_type="output" +# ) + +# if fld.type is MultiOutputFile and type(value) is list: +# # TODO: how to deal with mandatory list outputs +# ret = [] +# for val in value: +# val = Path(val) +# if check_existance and not val.exists(): +# ret.append(attrs.NOTHING) +# else: +# ret.append(val) +# return ret +# else: +# val = Path(value) +# # checking if the file exists +# if check_existance and not val.exists(): +# # if mandatory raise exception +# if "mandatory" in fld.metadata: +# if fld.metadata["mandatory"]: +# raise Exception( +# f"mandatory output for variable {fld.name} does not exist" +# ) +# return attrs.NOTHING +# return val +# elif "callable" in fld.metadata: +# callable_ = fld.metadata["callable"] +# if isinstance(callable_, staticmethod): +# # In case callable is defined as a static method, +# # retrieve the function wrapped in the descriptor. +# callable_ = callable_.__func__ +# call_args = inspect.getfullargspec(callable_) +# call_args_val = {} +# for argnm in call_args.args: +# if argnm == "field": +# call_args_val[argnm] = fld +# elif argnm == "output_dir": +# call_args_val[argnm] = output_dir +# elif argnm == "inputs": +# call_args_val[argnm] = inputs +# elif argnm == "stdout": +# call_args_val[argnm] = outputs["stdout"] +# elif argnm == "stderr": +# call_args_val[argnm] = outputs["stderr"] +# else: +# try: +# call_args_val[argnm] = getattr(inputs, argnm) +# except AttributeError: +# raise AttributeError( +# f"arguments of the callable function from {fld.name} " +# f"has to be in inputs or be field or output_dir, " +# f"but {argnm} is used" +# ) +# return callable_(**call_args_val) +# else: +# raise Exception( +# f"Metadata for '{fld.name}', does not not contain any of the required fields " +# f'("callable", "output_file_template" or "value"): {fld.metadata}.' +# ) + +# def _check_requires(self, fld, inputs): +# """checking if all fields from the requires and template are set in the input +# if requires is a list of list, checking if at least one list has all elements set +# """ +# from .helpers import ensure_list + +# if "requires" in fld.metadata: +# # if requires is a list of list it is treated as el[0] OR el[1] OR... +# required_fields = ensure_list(fld.metadata["requires"]) +# if all([isinstance(el, list) for el in required_fields]): +# field_required_OR = required_fields +# # if requires is a list of tuples/strings - I'm creating a 1-el nested list +# elif all([isinstance(el, (str, tuple)) for el in required_fields]): +# field_required_OR = [required_fields] +# else: +# raise Exception( +# f"requires field can be a list of list, or a list " +# f"of strings/tuples, but {fld.metadata['requires']} " +# f"provided for {fld.name}" +# ) +# else: +# field_required_OR = [[]] + +# for field_required in field_required_OR: +# # if the output has output_file_template field, +# # adding all input fields from the template to requires +# if "output_file_template" in fld.metadata: +# template = fld.metadata["output_file_template"] +# # if a template is a function it has to be run first with the inputs as the only arg +# if callable(template): +# template = template(inputs) +# inp_fields = re.findall(r"{\w+}", template) +# field_required += [ +# el[1:-1] for el in inp_fields if el[1:-1] not in field_required +# ] + +# # it's a flag, of the field from the list is not in input it will be changed to False +# required_found = True +# for field_required in field_required_OR: +# required_found = True +# # checking if the input fields from requires have set values +# for inp in field_required: +# if isinstance(inp, str): # name of the input field +# if not hasattr(inputs, inp): +# raise Exception( +# f"{inp} is not a valid input field, can't be used in requires" +# ) +# elif getattr(inputs, inp) in [attrs.NOTHING, None]: +# required_found = False +# break +# elif isinstance(inp, tuple): # (name, allowed values) +# inp, allowed_val = inp[0], ensure_list(inp[1]) +# if not hasattr(inputs, inp): +# raise Exception( +# f"{inp} is not a valid input field, can't be used in requires" +# ) +# elif getattr(inputs, inp) not in allowed_val: +# required_found = False +# break +# else: +# raise Exception( +# f"each element of the requires element should be a string or a tuple, " +# f"but {inp} is found in {field_required}" +# ) +# # if the specific list from field_required_OR has all elements set, no need to check more +# if required_found: +# break + +# if required_found: +# return True +# else: +# return False + + +# @attrs.define +# class LazyInterface: +# _task: "core.Task" = attrs.field() +# _attr_type: str + +# def __getattr__(self, name): +# if name in ("_task", "_attr_type", "_field_names"): +# raise AttributeError(f"{name} hasn't been set yet") +# if name not in self._field_names: +# raise AttributeError( +# f"Task '{self._task.name}' has no {self._attr_type} attribute '{name}', " +# "available: '" + "', '".join(self._field_names) + "'" +# ) +# type_ = self._get_type(name) +# splits = self._get_task_splits() +# combines = self._get_task_combines() +# if combines and self._attr_type == "output": +# # Add in any scalar splits referencing upstream splits, i.e. "_myupstreamtask", +# # "_myarbitrarytask" +# combined_upstreams = set() +# if self._task.state: +# for scalar in LazyField.normalize_splitter( +# self._task.state.splitter, strip_previous=False +# ): +# for field in scalar: +# if field.startswith("_"): +# node_name = field[1:] +# if any(c.split(".")[0] == node_name for c in combines): +# combines.update( +# f for f in scalar if not f.startswith("_") +# ) +# combined_upstreams.update( +# f[1:] for f in scalar if f.startswith("_") +# ) +# if combines: +# # Wrap type in list which holds the combined items +# type_ = ty.List[type_] +# # Iterate through splits to remove any splits which are removed by the +# # combiner +# for splitter in copy(splits): +# remaining = tuple( +# s +# for s in splitter +# if not any( +# (x in combines or x.split(".")[0] in combined_upstreams) +# for x in s +# ) +# ) +# if remaining != splitter: +# splits.remove(splitter) +# if remaining: +# splits.add(remaining) +# # Wrap the type in a nested StateArray type +# if splits: +# type_ = StateArray[type_] +# lf_klass = LazyInField if self._attr_type == "input" else LazyOutField +# return lf_klass[type_]( +# name=self._task.name, +# field=name, +# type=type_, +# splits=splits, +# ) + +# def _get_task_splits(self) -> ty.Set[ty.Tuple[ty.Tuple[str, ...], ...]]: +# """Returns the states over which the inputs of the task are split""" +# splitter = self._task.state.splitter if self._task.state else None +# splits = set() +# if splitter: +# # Ensure that splits is of tuple[tuple[str, ...], ...] form +# splitter = LazyField.normalize_splitter(splitter) +# if splitter: +# splits.add(splitter) +# for inpt in attrs.asdict(self._task.inputs, recurse=False).values(): +# if isinstance(inpt, LazyField): +# splits.update(inpt.splits) +# return splits + +# def _get_task_combines(self) -> ty.Set[ty.Union[str, ty.Tuple[str, ...]]]: +# """Returns the states over which the outputs of the task are combined""" +# combiner = ( +# self._task.state.combiner +# if self._task.state is not None +# else getattr(self._task, "fut_combiner", None) +# ) +# return set(combiner) if combiner else set() + + +# class LazyIn(LazyInterface): +# _attr_type = "input" + +# def _get_type(self, name): +# attr = next(t for n, t in self._task.input_spec.fields if n == name) +# if attr is None: +# return ty.Any +# elif inspect.isclass(attr): +# return attr +# else: +# return attr.type + +# @property +# def _field_names(self): +# return [field[0] for field in self._task.input_spec.fields] + + +# class LazyOut(LazyInterface): +# _attr_type = "output" + +# def _get_type(self, name): +# try: +# type_ = next(f[1] for f in self._task.output_spec.fields if f[0] == name) +# except StopIteration: +# type_ = ty.Any +# else: +# if not inspect.isclass(type_): +# try: +# type_ = type_.type # attrs _CountingAttribute +# except AttributeError: +# pass # typing._SpecialForm +# return type_ + +# @property +# def _field_names(self): +# return self._task.output_names + ["all_"] - def _check_requires(self, fld, inputs): - """checking if all fields from the requires and template are set in the input - if requires is a list of list, checking if at least one list has all elements set - """ - from .helpers import ensure_list - - if "requires" in fld.metadata: - # if requires is a list of list it is treated as el[0] OR el[1] OR... - required_fields = ensure_list(fld.metadata["requires"]) - if all([isinstance(el, list) for el in required_fields]): - field_required_OR = required_fields - # if requires is a list of tuples/strings - I'm creating a 1-el nested list - elif all([isinstance(el, (str, tuple)) for el in required_fields]): - field_required_OR = [required_fields] - else: - raise Exception( - f"requires field can be a list of list, or a list " - f"of strings/tuples, but {fld.metadata['requires']} " - f"provided for {fld.name}" - ) - else: - field_required_OR = [[]] - - for field_required in field_required_OR: - # if the output has output_file_template field, - # adding all input fields from the template to requires - if "output_file_template" in fld.metadata: - template = fld.metadata["output_file_template"] - # if a template is a function it has to be run first with the inputs as the only arg - if callable(template): - template = template(inputs) - inp_fields = re.findall(r"{\w+}", template) - field_required += [ - el[1:-1] for el in inp_fields if el[1:-1] not in field_required - ] - - # it's a flag, of the field from the list is not in input it will be changed to False - required_found = True - for field_required in field_required_OR: - required_found = True - # checking if the input fields from requires have set values - for inp in field_required: - if isinstance(inp, str): # name of the input field - if not hasattr(inputs, inp): - raise Exception( - f"{inp} is not a valid input field, can't be used in requires" - ) - elif getattr(inputs, inp) in [attr.NOTHING, None]: - required_found = False - break - elif isinstance(inp, tuple): # (name, allowed values) - inp, allowed_val = inp[0], ensure_list(inp[1]) - if not hasattr(inputs, inp): - raise Exception( - f"{inp} is not a valid input field, can't be used in requires" - ) - elif getattr(inputs, inp) not in allowed_val: - required_found = False - break - else: - raise Exception( - f"each element of the requires element should be a string or a tuple, " - f"but {inp} is found in {field_required}" - ) - # if the specific list from field_required_OR has all elements set, no need to check more - if required_found: - break - - if required_found: - return True - else: - return False +def donothing(*args, **kwargs): + return None -@attr.s -class LazyInterface: - _task: "core.Task" = attr.ib() - _attr_type: str - def __getattr__(self, name): - if name in ("_task", "_attr_type", "_field_names"): - raise AttributeError(f"{name} hasn't been set yet") - if name not in self._field_names: - raise AttributeError( - f"Task '{self._task.name}' has no {self._attr_type} attribute '{name}', " - "available: '" + "', '".join(self._field_names) + "'" - ) - type_ = self._get_type(name) - splits = self._get_task_splits() - combines = self._get_task_combines() - if combines and self._attr_type == "output": - # Add in any scalar splits referencing upstream splits, i.e. "_myupstreamtask", - # "_myarbitrarytask" - combined_upstreams = set() - if self._task.state: - for scalar in LazyField.normalize_splitter( - self._task.state.splitter, strip_previous=False - ): - for field in scalar: - if field.startswith("_"): - node_name = field[1:] - if any(c.split(".")[0] == node_name for c in combines): - combines.update( - f for f in scalar if not f.startswith("_") - ) - combined_upstreams.update( - f[1:] for f in scalar if f.startswith("_") - ) - if combines: - # Wrap type in list which holds the combined items - type_ = ty.List[type_] - # Iterate through splits to remove any splits which are removed by the - # combiner - for splitter in copy(splits): - remaining = tuple( - s - for s in splitter - if not any( - (x in combines or x.split(".")[0] in combined_upstreams) - for x in s - ) - ) - if remaining != splitter: - splits.remove(splitter) - if remaining: - splits.add(remaining) - # Wrap the type in a nested StateArray type - if splits: - type_ = StateArray[type_] - lf_klass = LazyInField if self._attr_type == "input" else LazyOutField - return lf_klass[type_]( - name=self._task.name, - field=name, - type=type_, - splits=splits, - ) +@attrs.define(auto_attribs=True, kw_only=True) +class TaskHook: + """Callable task hooks.""" - def _get_task_splits(self) -> ty.Set[ty.Tuple[ty.Tuple[str, ...], ...]]: - """Returns the states over which the inputs of the task are split""" - splitter = self._task.state.splitter if self._task.state else None - splits = set() - if splitter: - # Ensure that splits is of tuple[tuple[str, ...], ...] form - splitter = LazyField.normalize_splitter(splitter) - if splitter: - splits.add(splitter) - for inpt in attr.asdict(self._task.inputs, recurse=False).values(): - if isinstance(inpt, LazyField): - splits.update(inpt.splits) - return splits - - def _get_task_combines(self) -> ty.Set[ty.Union[str, ty.Tuple[str, ...]]]: - """Returns the states over which the outputs of the task are combined""" - combiner = ( - self._task.state.combiner - if self._task.state is not None - else getattr(self._task, "fut_combiner", None) - ) - return set(combiner) if combiner else set() + pre_run_task: ty.Callable = donothing + post_run_task: ty.Callable = donothing + pre_run: ty.Callable = donothing + post_run: ty.Callable = donothing + def __setattr__(self, attr, val): + if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: + raise AttributeError("Cannot set unknown hook") + super().__setattr__(attr, val) -class LazyIn(LazyInterface): - _attr_type = "input" + def reset(self): + for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: + setattr(self, val, donothing) - def _get_type(self, name): - attr = next(t for n, t in self._task.input_spec.fields if n == name) - if attr is None: - return ty.Any - elif inspect.isclass(attr): - return attr - else: - return attr.type - @property - def _field_names(self): - return [field[0] for field in self._task.input_spec.fields] +def path_to_string(value): + """Convert paths to strings.""" + if isinstance(value, Path): + value = str(value) + elif isinstance(value, list) and len(value) and isinstance(value[0], Path): + value = [str(val) for val in value] + return value -class LazyOut(LazyInterface): - _attr_type = "output" +class OutputsSpec: + """Base class for all output specifications""" - def _get_type(self, name): - try: - type_ = next(f[1] for f in self._task.output_spec.fields if f[0] == name) - except StopIteration: - type_ = ty.Any - else: - if not inspect.isclass(type_): - try: - type_ = type_.type # attrs _CountingAttribute - except AttributeError: - pass # typing._SpecialForm - return type_ - - @property - def _field_names(self): - return self._task.output_names + ["all_"] - - -TypeOrAny = ty.Union[ty.Type[T], ty.Any] -Splitter = ty.Union[str, ty.Tuple[str, ...]] - - -@attr.s(auto_attribs=True, kw_only=True) -class LazyField(ty.Generic[T]): - """Lazy fields implement promises.""" - - name: str - field: str - type: TypeOrAny - # Set of splitters that have been applied to the lazy field. Note that the splitter - # specifications are transformed to a tuple[tuple[str, ...], ...] form where the - # outer tuple is the outer product, the inner tuple are inner products (where either - # product can be of length==1) - splits: ty.FrozenSet[ty.Tuple[ty.Tuple[str, ...], ...]] = attr.field( - factory=frozenset, converter=frozenset - ) - cast_from: ty.Optional[ty.Type[ty.Any]] = None - # type_checked will be set to False after it is created but defaults to True here for - # ease of testing - type_checked: bool = True - - def __bytes_repr__(self, cache): - yield type(self).__name__.encode() - yield self.name.encode() - yield self.field.encode() - - def cast(self, new_type: TypeOrAny) -> Self: - """ "casts" the lazy field to a new type + def split( + self, + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, + /, + overwrite: bool = False, + cont_dim: ty.Optional[dict] = None, + **inputs, + ) -> Self: + """ + Run this task parametrically over lists of split inputs. Parameters ---------- - new_type : type - the type to cast the lazy-field to + splitter : str or list[str] or tuple[str] or None + the fields which to split over. If splitting over multiple fields, lists of + fields are interpreted as outer-products and tuples inner-products. If None, + then the fields to split are taken from the keyword-arg names. + overwrite : bool, optional + whether to overwrite an existing split on the node, by default False + cont_dim : dict, optional + Container dimensions for specific inputs, used in the splitter. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. + **inputs + fields to split over, will automatically be wrapped in a StateArray object + and passed to the node inputs Returns ------- - cast_field : LazyField - a copy of the lazy field with the new type + self : TaskBase + a reference to the task """ - return type(self)[new_type]( - name=self.name, - field=self.field, - type=new_type, - splits=self.splits, - cast_from=self.cast_from if self.cast_from else self.type, - ) - - def split(self, splitter: Splitter) -> Self: - """ "Splits" the lazy field over an array of nodes by replacing the sequence type - of the lazy field with StateArray to signify that it will be "split" across - - Parameters - ---------- - splitter : str or ty.Tuple[str, ...] or ty.List[str] - the splitter to append to the list of splitters + self._node.split(splitter, overwrite=overwrite, cont_dim=cont_dim, **inputs) + return self + + def combine( + self, + combiner: ty.Union[ty.List[str], str], + overwrite: bool = False, # **kwargs + ) -> Self: """ - from pydra.utils.typing import ( - TypeParser, - ) # pylint: disable=import-outside-toplevel - - splits = self.splits | set([LazyField.normalize_splitter(splitter)]) - # Check to see whether the field has already been split over the given splitter - if splits == self.splits: - return self - - # Modify the type of the lazy field to include the split across a state-array - inner_type, prev_split_depth = TypeParser.strip_splits(self.type) - assert prev_split_depth <= 1 - if inner_type is ty.Any: - type_ = StateArray[ty.Any] - elif TypeParser.matches_type(inner_type, list): - item_type = TypeParser.get_item_type(inner_type) - type_ = StateArray[item_type] - else: - raise TypeError( - f"Cannot split non-sequence field {self} of type {inner_type}" - ) - if prev_split_depth: - type_ = StateArray[type_] - return type(self)[type_]( - name=self.name, - field=self.field, - type=type_, - splits=splits, - ) - - # def combine(self, combiner: str | list[str]) -> Self: - - @classmethod - def normalize_splitter( - cls, splitter: Splitter, strip_previous: bool = True - ) -> ty.Tuple[ty.Tuple[str, ...], ...]: - """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form - used in LazyFields""" - if isinstance(splitter, str): - splitter = (splitter,) - if isinstance(splitter, tuple): - splitter = (splitter,) # type: ignore - else: - assert isinstance(splitter, list) - # convert to frozenset to differentiate from tuple, yet still be hashable - # (NB: order of fields in list splitters aren't relevant) - splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) - # Strip out fields starting with "_" designating splits in upstream nodes - if strip_previous: - stripped = tuple( - tuple(f for f in i if not f.startswith("_")) for i in splitter - ) - splitter = tuple(s for s in stripped if s) # type: ignore - return splitter # type: ignore - - def _apply_cast(self, value): - """\"Casts\" the value from the retrieved type if a cast has been applied to - the lazy-field""" - from pydra.utils.typing import TypeParser - - if self.cast_from: - assert TypeParser.matches(value, self.cast_from) - value = self.type(value) - return value - - -@attr.s(auto_attribs=True, kw_only=True) -class LazyInField(LazyField[T]): - - name: str = None - attr_type = "input" - - def get_value( - self, wf: "pydra.engine.workflow.Workflow", state_index: ty.Optional[int] = None - ) -> ty.Any: - """Return the value of a lazy field. + Combine inputs parameterized by one or more previous tasks. Parameters ---------- - wf : Workflow - the workflow the lazy field references - state_index : int, optional - the state index of the field to access + combiner : list[str] or str + the field or list of inputs to be combined (i.e. not left split) after the + task has been run + overwrite : bool + whether to overwrite an existing combiner on the node + **kwargs : dict[str, Any] + values for the task that will be "combined" before they are provided to the + node Returns ------- - value : Any - the resolved value of the lazy-field + self : Self + a reference to the outputs object """ - from pydra.utils.typing import ( - TypeParser, - ) # pylint: disable=import-outside-toplevel - - value = getattr(wf.inputs, self.field) - if TypeParser.is_subclass(self.type, StateArray) and not wf._pre_split: - _, split_depth = TypeParser.strip_splits(self.type) - - def apply_splits(obj, depth): - if depth < 1: - return obj - return StateArray[self.type](apply_splits(i, depth - 1) for i in obj) + self._node.combine(combiner, overwrite=overwrite) + return self - value = apply_splits(value, split_depth) - value = self._apply_cast(value) - return value +OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) -class LazyOutField(LazyField[T]): - attr_type = "output" - def get_value( - self, wf: "pydra.Workflow", state_index: ty.Optional[int] = None - ) -> ty.Any: - """Return the value of a lazy field. +class TaskSpec(ty.Generic[OutputType]): + """Base class for all task specifications""" - Parameters - ---------- - wf : Workflow - the workflow the lazy field references - state_index : int, optional - the state index of the field to access + Task: "ty.Type[core.Task]" - Returns - ------- - value : Any - the resolved value of the lazy-field - """ - from pydra.utils.typing import ( - TypeParser, - ) # pylint: disable=import-outside-toplevel - - node = getattr(wf, self.name) - result = node.result(state_index=state_index) - if result is None: - raise RuntimeError( - f"Could not find results of '{node.name}' node in a sub-directory " - f"named '{node.checksum}' in any of the cache locations.\n" - + "\n".join(str(p) for p in set(node.cache_locations)) - + f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. " - f"Current values and hashes: {node.inputs}, " - f"{node.inputs._hashes}\n\n" - "Set loglevel to 'debug' in order to track hash changes " - "throughout the execution of the workflow.\n\n " - "These issues may have been caused by `bytes_repr()` methods " - "that don't return stable hash values for specific object " - "types across multiple processes (see bytes_repr() " - '"singledispatch "function in pydra/utils/hash.py).' - "You may need to write specific `bytes_repr()` " - "implementations (see `pydra.utils.hash.register_serializer`) or a " - "`__bytes_repr__()` dunder methods to handle one or more types in " - "your interface inputs." + def __call__( + self, + name: str | None = None, + audit_flags: AuditFlag = AuditFlag.NONE, + cache_dir=None, + cache_locations=None, + inputs: ty.Text | File | dict[str, ty.Any] | None = None, + cont_dim=None, + messenger_args=None, + messengers=None, + rerun=False, + **kwargs, + ): + self._check_for_unset_values() + task = self.Task( + self, + name=name, + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + inputs=inputs, + cont_dim=cont_dim, + messenger_args=messenger_args, + messengers=messengers, + rerun=rerun, + ) + return task(**kwargs) + + def _check_for_unset_values(self): + if unset := [ + k + for k, v in attrs.asdict(self, recurse=False).items() + if v is attrs.NOTHING + ]: + raise ValueError( + f"The following values {unset} in the {self!r} interface need to be set " + "before the workflow can be constructed" ) - _, split_depth = TypeParser.strip_splits(self.type) - - def get_nested_results(res, depth: int): - if isinstance(res, list): - if not depth: - val = [r.get_output_field(self.field) for r in res] - else: - val = StateArray[self.type]( - get_nested_results(res=r, depth=depth - 1) for r in res - ) - else: - if res.errored: - raise ValueError( - f"Cannot retrieve value for {self.field} from {self.name} as " - "the node errored" - ) - val = res.get_output_field(self.field) - if depth and not wf._pre_split: - assert isinstance(val, ty.Sequence) and not isinstance(val, str) - val = StateArray[self.type](val) - return val - - value = get_nested_results(result, depth=split_depth) - value = self._apply_cast(value) - return value - - -class StateArray(ty.List[T]): - """an array of values from, or to be split over in an array of nodes (see TaskBase.split()), - multiple nodes of the same task. Used in type-checking to differentiate between list - types and values for multiple nodes - """ - - def __repr__(self): - return f"{type(self).__name__}(" + ", ".join(repr(i) for i in self) + ")" - - -def donothing(*args, **kwargs): - return None - - -@attr.s(auto_attribs=True, kw_only=True) -class TaskHook: - """Callable task hooks.""" - - pre_run_task: ty.Callable = donothing - post_run_task: ty.Callable = donothing - pre_run: ty.Callable = donothing - post_run: ty.Callable = donothing - - def __setattr__(self, attr, val): - if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - raise AttributeError("Cannot set unknown hook") - super().__setattr__(attr, val) - - def reset(self): - for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - setattr(self, val, donothing) - - -def path_to_string(value): - """Convert paths to strings.""" - if isinstance(value, Path): - value = str(value) - elif isinstance(value, list) and len(value) and isinstance(value[0], Path): - value = [str(val) for val in value] - return value -from . import core # noqa +from pydra.engine import core # noqa: E402 diff --git a/pydra/engine/state.py b/pydra/engine/state.py index befbf86b9d..ffaddf3f3f 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -3,10 +3,11 @@ from copy import deepcopy import itertools from functools import reduce - +import attrs from . import helpers_state as hlpst from .helpers import ensure_list -from .specs import BaseSpec + +# from .specs import BaseSpec # TODO: move to State op = {".": zip, "*": itertools.product} @@ -763,8 +764,8 @@ def prepare_states(self, inputs, cont_dim=None): self.cont_dim = cont_dim else: self.cont_dim = {} - if isinstance(inputs, BaseSpec): - self.inputs = hlpst.inputs_types_to_dict(self.name, inputs) + if attrs.has(inputs): + self.inputs = attrs.asdict(inputs, recurse=False) else: self.inputs = inputs if self.other_states: diff --git a/pydra/engine/task.py b/pydra/engine/task.py index b9317602bd..68731f47bc 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -55,10 +55,11 @@ from .core import Task, is_lazy from pydra.utils.messenger import AuditFlag from .specs import ( - BaseSpec, - SpecInfo, + # BaseSpec, + # SpecInfo, # ShellSpec, # ShellOutSpec, + TaskSpec, attr_fields, ) from .helpers import ( @@ -78,16 +79,14 @@ class FunctionTask(Task): def __init__( self, - func: ty.Callable, + spec: TaskSpec, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cache_locations=None, - input_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, cont_dim=None, messenger_args=None, messengers=None, name=None, - output_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, rerun=False, **kwargs, ): @@ -226,14 +225,13 @@ class ShellCommandTask(Task): def __init__( self, + spec: TaskSpec, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, - input_spec: ty.Optional[SpecInfo] = None, cont_dim=None, messenger_args=None, messengers=None, name=None, - output_spec: ty.Optional[SpecInfo] = None, rerun=False, strip=False, environment=Native(), diff --git a/pydra/engine/workflow/__init__.py b/pydra/engine/workflow/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py new file mode 100644 index 0000000000..5026109111 --- /dev/null +++ b/pydra/engine/workflow/base.py @@ -0,0 +1,178 @@ +import typing as ty +from copy import copy +from operator import itemgetter +from typing_extensions import Self +import attrs +from pydra.engine.helpers import list_fields +from pydra.engine.specs import TaskSpec, OutputsSpec +from .lazy import LazyInField +from pydra.utils.hash import hash_function +from pydra.utils.typing import TypeParser, StateArray +from .node import Node + + +OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) + + +@attrs.define(auto_attribs=False) +class Workflow(ty.Generic[OutputType]): + """A workflow, constructed from a workflow specification + + Parameters + ---------- + name : str + The name of the workflow + inputs : TaskSpec + The input specification of the workflow + outputs : TaskSpec + The output specification of the workflow + """ + + name: str = attrs.field() + inputs: TaskSpec[OutputType] = attrs.field() + outputs: OutputType = attrs.field() + _nodes: dict[str, Node] = attrs.field(factory=dict) + + @classmethod + def construct( + cls, + spec: TaskSpec[OutputType], + ) -> Self: + """Construct a workflow from a specification, caching the constructed worklow""" + + lazy_inputs = [f for f in list_fields(type(spec)) if f.lazy] + + # Create a cache key by hashing all the non-lazy input values in the spec + # and use this to store the constructed workflow in case it is reused or nested + # and split over within another workflow + lazy_input_names = {f.name for f in lazy_inputs} + non_lazy_vals = tuple( + sorted( + ( + i + for i in attrs.asdict(spec, recurse=False).items() + if i[0] not in lazy_input_names + ), + key=itemgetter(0), + ) + ) + hash_key = hash_function(non_lazy_vals) + if hash_key in cls._constructed: + return cls._constructed[hash_key] + + # Initialise the outputs of the workflow + outputs = spec.Outputs( + **{f.name: attrs.NOTHING for f in attrs.fields(spec.Outputs)} + ) + + # Initialise the lzin fields + lazy_spec = copy(spec) + wf = cls.under_construction = Workflow( + name=type(spec).__name__, + inputs=lazy_spec, + outputs=outputs, + ) + for lzy_inpt in lazy_inputs: + setattr( + lazy_spec, + lzy_inpt.name, + LazyInField( + node=wf, + field=lzy_inpt.name, + type=lzy_inpt.type, + ), + ) + + input_values = attrs.asdict(lazy_spec, recurse=False) + constructor = input_values.pop("constructor") + cls._under_construction = wf + try: + # Call the user defined constructor to set the outputs + output_lazy_fields = constructor(**input_values) + # Check to see whether any mandatory inputs are not set + for node in wf.nodes: + node.inputs._check_for_unset_values() + # Check that the outputs are set correctly, either directly by the constructor + # or via returned values that can be zipped with the output names + if output_lazy_fields: + if not isinstance(output_lazy_fields, (list, tuple)): + output_lazy_fields = [output_lazy_fields] + output_fields = list_fields(spec.Outputs) + if len(output_lazy_fields) != len(output_fields): + raise ValueError( + f"Expected {len(output_fields)} outputs, got " + f"{len(output_lazy_fields)} ({output_lazy_fields})" + ) + for outpt, outpt_lf in zip(output_fields, output_lazy_fields): + if TypeParser.get_origin(outpt_lf.type) is StateArray: + # Automatically combine any uncombined state arrays into lists + tp, _ = TypeParser.strip_splits(outpt_lf.type) + outpt_lf.type = list[tp] + outpt_lf.splits = frozenset() + setattr(outputs, outpt.name, outpt_lf) + else: + if unset_outputs := [ + a + for a, v in attrs.asdict(outputs, recurse=False).items() + if v is attrs.NOTHING + ]: + raise ValueError( + f"Expected outputs {unset_outputs} to be set by the " + f"constructor of {wf!r}" + ) + finally: + cls._under_construction = None + + cls._constructed[hash_key] = wf + + return wf + + def add(self, task_spec: TaskSpec[OutputType], name=None) -> OutputType: + """Add a node to the workflow + + Parameters + ---------- + task_spec : TaskSpec + The specification of the task to add to the workflow as a node + name : str, optional + The name of the node, by default it will be the name of the task specification + class + + Returns + ------- + OutputType + The outputs specification of the node + """ + if name is None: + name = type(task_spec).__name__ + if name in self._nodes: + raise ValueError(f"Node with name {name!r} already exists in the workflow") + node = Node[OutputType](name=name, spec=task_spec, workflow=self) + self._nodes[name] = node + return node.lzout + + def __getitem__(self, key: str) -> Node: + return self._nodes[key] + + @property + def nodes(self) -> ty.Iterable[Node]: + return self._nodes.values() + + @property + def node_names(self) -> list[str]: + return list(self._nodes) + + @property + @classmethod + def under_construction(cls) -> "Workflow[ty.Any]": + if cls._under_construction is None: + raise ValueError( + "pydra.design.workflow.this() can only be called from within a workflow " + "constructor function (see 'pydra.design.workflow.define')" + ) + return cls._under_construction + + # Used to store the workflow that is currently being constructed + _under_construction: "Workflow[ty.Any]" = None + # Used to cache the constructed workflows by their hashed input values + _constructed: dict[int, "Workflow[ty.Any]"] = {} diff --git a/pydra/engine/workflow/lazy.py b/pydra/engine/workflow/lazy.py new file mode 100644 index 0000000000..f9d7bbddbb --- /dev/null +++ b/pydra/engine/workflow/lazy.py @@ -0,0 +1,250 @@ +import typing as ty +from typing_extensions import Self +import attrs +from pydra.utils.typing import StateArray +from . import node + +if ty.TYPE_CHECKING: + from .base import Workflow + + +T = ty.TypeVar("T") + +TypeOrAny = ty.Union[type, ty.Any] +Splitter = ty.Union[str, ty.Tuple[str, ...]] + + +@attrs.define(auto_attribs=True, kw_only=True) +class LazyField(ty.Generic[T]): + """Lazy fields implement promises.""" + + node: node.Node + field: str + type: TypeOrAny + # Set of splitters that have been applied to the lazy field. Note that the splitter + # specifications are transformed to a tuple[tuple[str, ...], ...] form where the + # outer tuple is the outer product, the inner tuple are inner products (where either + # product can be of length==1) + splits: ty.FrozenSet[ty.Tuple[ty.Tuple[str, ...], ...]] = attrs.field( + factory=frozenset, converter=frozenset + ) + cast_from: ty.Optional[ty.Type[ty.Any]] = None + # type_checked will be set to False after it is created but defaults to True here for + # ease of testing + type_checked: bool = True + + def __bytes_repr__(self, cache): + yield type(self).__name__.encode() + yield self.name.encode() + yield self.field.encode() + + def cast(self, new_type: TypeOrAny) -> Self: + """ "casts" the lazy field to a new type + + Parameters + ---------- + new_type : type + the type to cast the lazy-field to + + Returns + ------- + cast_field : LazyField + a copy of the lazy field with the new type + """ + return type(self)[new_type]( + name=self.name, + field=self.field, + type=new_type, + splits=self.splits, + cast_from=self.cast_from if self.cast_from else self.type, + ) + + # def split(self, splitter: Splitter) -> Self: + # """ "Splits" the lazy field over an array of nodes by replacing the sequence type + # of the lazy field with StateArray to signify that it will be "split" across + + # Parameters + # ---------- + # splitter : str or ty.Tuple[str, ...] or ty.List[str] + # the splitter to append to the list of splitters + # """ + # from pydra.utils.typing import ( + # TypeParser, + # ) # pylint: disable=import-outside-toplevel + + # splits = self.splits | set([LazyField.normalize_splitter(splitter)]) + # # Check to see whether the field has already been split over the given splitter + # if splits == self.splits: + # return self + + # # Modify the type of the lazy field to include the split across a state-array + # inner_type, prev_split_depth = TypeParser.strip_splits(self.type) + # assert prev_split_depth <= 1 + # if inner_type is ty.Any: + # type_ = StateArray[ty.Any] + # elif TypeParser.matches_type(inner_type, list): + # item_type = TypeParser.get_item_type(inner_type) + # type_ = StateArray[item_type] + # else: + # raise TypeError( + # f"Cannot split non-sequence field {self} of type {inner_type}" + # ) + # if prev_split_depth: + # type_ = StateArray[type_] + # return type(self)[type_]( + # name=self.name, + # field=self.field, + # type=type_, + # splits=splits, + # ) + + # # def combine(self, combiner: str | list[str]) -> Self: + + # @classmethod + # def normalize_splitter( + # cls, splitter: Splitter, strip_previous: bool = True + # ) -> ty.Tuple[ty.Tuple[str, ...], ...]: + # """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form + # used in LazyFields""" + # if isinstance(splitter, str): + # splitter = (splitter,) + # if isinstance(splitter, tuple): + # splitter = (splitter,) # type: ignore + # else: + # assert isinstance(splitter, list) + # # convert to frozenset to differentiate from tuple, yet still be hashable + # # (NB: order of fields in list splitters aren't relevant) + # splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) + # # Strip out fields starting with "_" designating splits in upstream nodes + # if strip_previous: + # stripped = tuple( + # tuple(f for f in i if not f.startswith("_")) for i in splitter + # ) + # splitter = tuple(s for s in stripped if s) # type: ignore + # return splitter # type: ignore + + def _apply_cast(self, value): + """\"Casts\" the value from the retrieved type if a cast has been applied to + the lazy-field""" + from pydra.utils.typing import TypeParser + + if self.cast_from: + assert TypeParser.matches(value, self.cast_from) + value = self.type(value) + return value + + +@attrs.define(auto_attribs=True, kw_only=True) +class LazyInField(LazyField[T]): + + attr_type = "input" + + def __eq__(self, other): + return ( + isinstance(other, LazyInField) + and self.field == other.field + and self.type == other.type + and self.splits == other.splits + ) + + def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + wf : Workflow + the workflow the lazy field references + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + from pydra.utils.typing import ( + TypeParser, + ) # pylint: disable=import-outside-toplevel + + value = getattr(wf.inputs, self.field) + if TypeParser.is_subclass(self.type, StateArray) and not wf._pre_split: + _, split_depth = TypeParser.strip_splits(self.type) + + def apply_splits(obj, depth): + if depth < 1: + return obj + return StateArray[self.type](apply_splits(i, depth - 1) for i in obj) + + value = apply_splits(value, split_depth) + value = self._apply_cast(value) + return value + + +class LazyOutField(LazyField[T]): + attr_type = "output" + + def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + wf : Workflow + the workflow the lazy field references + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + from pydra.utils.typing import ( + TypeParser, + ) # pylint: disable=import-outside-toplevel + + node = getattr(wf, self.name) + result = node.result(state_index=state_index) + if result is None: + raise RuntimeError( + f"Could not find results of '{node.name}' node in a sub-directory " + f"named '{node.checksum}' in any of the cache locations.\n" + + "\n".join(str(p) for p in set(node.cache_locations)) + + f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. " + f"Current values and hashes: {node.inputs}, " + f"{node.inputs._hashes}\n\n" + "Set loglevel to 'debug' in order to track hash changes " + "throughout the execution of the workflow.\n\n " + "These issues may have been caused by `bytes_repr()` methods " + "that don't return stable hash values for specific object " + "types across multiple processes (see bytes_repr() " + '"singledispatch "function in pydra/utils/hash.py).' + "You may need to write specific `bytes_repr()` " + "implementations (see `pydra.utils.hash.register_serializer`) or a " + "`__bytes_repr__()` dunder methods to handle one or more types in " + "your interface inputs." + ) + _, split_depth = TypeParser.strip_splits(self.type) + + def get_nested_results(res, depth: int): + if isinstance(res, list): + if not depth: + val = [r.get_output_field(self.field) for r in res] + else: + val = StateArray[self.type]( + get_nested_results(res=r, depth=depth - 1) for r in res + ) + else: + if res.errored: + raise ValueError( + f"Cannot retrieve value for {self.field} from {self.name} as " + "the node errored" + ) + val = res.get_output_field(self.field) + if depth and not wf._pre_split: + assert isinstance(val, ty.Sequence) and not isinstance(val, str) + val = StateArray[self.type](val) + return val + + value = get_nested_results(result, depth=split_depth) + value = self._apply_cast(value) + return value diff --git a/pydra/engine/workflow.py b/pydra/engine/workflow/node.py similarity index 83% rename from pydra/engine/workflow.py rename to pydra/engine/workflow/node.py index 475c60d338..373410b30d 100644 --- a/pydra/engine/workflow.py +++ b/pydra/engine/workflow/node.py @@ -3,13 +3,13 @@ from operator import itemgetter from typing_extensions import Self import attrs -from pydra.design.base import list_fields, TaskSpec, OutputsSpec -from pydra.engine.specs import LazyField, LazyInField, LazyOutField, StateArray from pydra.utils.hash import hash_function -from pydra.utils.typing import TypeParser -from . import helpers_state as hlpst -from .helpers import ensure_list -from . import state +from pydra.utils.typing import TypeParser, StateArray +from . import lazy +from ..specs import TaskSpec, OutputsSpec +from .. import helpers_state as hlpst +from ..helpers import ensure_list, list_fields +from .. import state OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) @@ -28,22 +28,61 @@ class Node(ty.Generic[OutputType]): """ name: str - inputs: TaskSpec[OutputType] - _workflow: "Workflow" = None - _lzout: OutputType | None = None - _state: state.State | None = None - _cont_dim: dict[str, int] | None = ( - None # QUESTION: should this be included in the state? + _spec: TaskSpec[OutputType] + _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False) + _lzout: OutputType | None = attrs.field( + init=False, default=None, eq=False, hash=False ) + _state: state.State | None = attrs.field(init=False, default=None) + _cont_dim: dict[str, int] | None = attrs.field( + init=False, default=None + ) # QUESTION: should this be included in the state? + + class Inputs: + """A class to wrap the inputs of a node and control access to them so lazy fields + that will change the downstream state aren't set after the node has been split, + combined or its outputs accessed + """ + + _node: "Node" + + def __init__(self, node: "Node") -> None: + super().__setattr__("_node", node) + + def __getattr__(self, name: str) -> ty.Any: + return getattr(self._node._spec, name) + + def __setattr__(self, name: str, value: ty.Any) -> None: + if isinstance(value, lazy.LazyField): + if self._node.state: + + raise AttributeError( + "Cannot set inputs on a node that has been split or combined" + ) + setattr(self._node._spec, name, value) + + @property + def state(self): + return self._state + + @property + def inputs(self) -> Inputs: + return self.Inputs(self) + + @property + def input_values(self) -> tuple[tuple[str, ty.Any]]: + return tuple(attrs.asdict(self._spec, recurse=False).items()) @property def lzout(self) -> OutputType: + from pydra.engine.helpers import list_fields + """The output spec of the node populated with lazy fields""" if self._lzout is not None: return self._lzout combined_splitter = set() - for inpt_name, inpt_val in attrs.asdict(self.inputs, recurse=False).items(): - if isinstance(inpt_val, LazyField): + for inpt_name, inpt_val in self.input_values: + if isinstance(inpt_val, lazy.LazyField): combined_splitter.update(inpt_val.splits) lazy_fields = {} for field in list_fields(self.inputs.Outputs): @@ -52,8 +91,8 @@ def lzout(self) -> OutputType: # over state values for _ in range(len(combined_splitter)): type_ = StateArray[type_] - lazy_fields[field.name] = LazyOutField( - name=self.name, + lazy_fields[field.name] = lazy.LazyOutField( + node=self, field=field.name, type=type_, splits=frozenset(iter(combined_splitter)), @@ -99,7 +138,7 @@ def split( self : TaskSpec a reference to the task """ - self._check_if_outputs_have_been_used() + self._check_if_outputs_have_been_used("the node cannot be split or combined") if splitter is None and inputs: splitter = list(inputs) elif splitter: @@ -129,7 +168,7 @@ def split( for inpt_name, inpt_val in inputs.items(): new_val: ty.Any if f"{self.name}.{inpt_name}" in split_inputs: # type: ignore - if isinstance(inpt_val, LazyField): + if isinstance(inpt_val, lazy.LazyField): new_val = inpt_val.split(splitter) elif isinstance(inpt_val, ty.Iterable) and not isinstance( inpt_val, (ty.Mapping, str) @@ -143,12 +182,12 @@ def split( new_val = inpt_val new_inputs[inpt_name] = new_val # Update the inputs with the new split values - self.inputs = attrs.evolve(self.inputs, **new_inputs) + self._spec = attrs.evolve(self._spec, **new_inputs) if not self._state or splitter != self._state.splitter: self._set_state(splitter) # Wrap types of lazy outputs in StateArray types - split_depth = len(LazyField.normalize_splitter(splitter)) - outpt_lf: LazyOutField + split_depth = len(lazy.LazyField.normalize_splitter(splitter)) + outpt_lf: lazy.LazyOutField for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): assert not outpt_lf.type_checked outpt_type = outpt_lf.type @@ -185,12 +224,10 @@ def combine( if not isinstance(combiner, (str, list)): raise Exception("combiner has to be a string or a list") combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) - if not_split := [ - c for c in combiner if not any(c in s for s in self._state.splitter) - ]: + if not_split := [c for c in combiner if not any(c in s for s in self.splitter)]: raise ValueError( f"Combiner fields {not_split} for Node {self.name!r} are not in the " - f"splitter fields {self._state.splitter}" + f"splitter fields {self.splitter}" ) if ( self._state @@ -207,16 +244,18 @@ def combine( # a task can have a combiner without a splitter # if is connected to one with a splitter; # self.fut_combiner will be used later as a combiner - self._state.fut_combiner = combiner + self._state.fut_combiner = ( + combiner # QUESTION: why separate combiner and fut_combiner? + ) else: # self.state and not self.state.combiner self._set_state(splitter=self._state.splitter, combiner=combiner) # Wrap types of lazy outputs in StateArray types - norm_splitter = LazyField.normalize_splitter(self._state.splitter) + norm_splitter = lazy.LazyField.normalize_splitter(self._state.splitter) remaining_splits = [ s for s in norm_splitter if not any(c in s for c in combiner) ] combine_depth = len(norm_splitter) - len(remaining_splits) - outpt_lf: LazyOutField + outpt_lf: lazy.LazyOutField for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): assert not outpt_lf.type_checked outpt_type, split_depth = TypeParser.strip_splits(outpt_lf.type) @@ -270,16 +309,16 @@ def cont_dim(self, cont_dim): @property def splitter(self): if not self._state: - return None + return () return self._state.splitter @property def combiner(self): if not self._state: - return None + return () return self._state.combiner - def _check_if_outputs_have_been_used(self): + def _check_if_outputs_have_been_used(self, msg): used = [] if self._lzout: for outpt_name, outpt_val in attrs.asdict( @@ -289,8 +328,8 @@ def _check_if_outputs_have_been_used(self): used.append(outpt_name) if used: raise RuntimeError( - f"Outputs {used} of {self} have already been accessed and therefore cannot " - "be split or combined" + f"Outputs {used} of {self} have already been accessed and therefore " + + msg ) @@ -356,7 +395,7 @@ def construct( setattr( lazy_spec, lzy_inpt.name, - LazyInField( + lazy.LazyInField( field=lzy_inpt.name, type=lzy_inpt.type, ), @@ -436,5 +475,7 @@ def under_construction(cls) -> "Workflow[ty.Any]": ) return cls._under_construction + # Used to store the workflow that is currently being constructed _under_construction: "Workflow[ty.Any]" = None + # Used to cache the constructed workflows by their hashed input values _constructed: dict[int, "Workflow[ty.Any]"] = {} diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index c9f1b9b592..decdc81e0f 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -8,14 +8,8 @@ import typing as ty import logging import attr -from pydra.engine.specs import ( - LazyField, - StateArray, - MultiInputObj, - MultiOutputObj, -) from pydra.utils import add_exc_note -from fileformats import field, core +from fileformats import field, core, generic try: from typing import get_origin, get_args @@ -46,6 +40,45 @@ TypeOrAny = ty.Union[type, ty.Any] +# These are special types that are checked for in the construction of input/output specs +# and special converters inserted into the attrs fields. + + +class MultiInputObj(list, ty.Generic[T]): + pass + + +MultiInputFile = MultiInputObj[generic.File] + + +# Since we can't create a NewType from a type union, we add a dummy type to the union +# so we can detect the MultiOutput in the input/output spec creation +class MultiOutputType: + pass + + +MultiOutputObj = ty.Union[list, object, MultiOutputType] +MultiOutputFile = ty.Union[generic.File, ty.List[generic.File], MultiOutputType] + +OUTPUT_TEMPLATE_TYPES = ( + Path, + ty.List[Path], + ty.Union[Path, bool], + ty.Union[ty.List[Path], bool], + ty.List[ty.List[Path]], +) + + +class StateArray(ty.List[T]): + """an array of values from, or to be split over in an array of nodes (see TaskBase.split()), + multiple nodes of the same task. Used in type-checking to differentiate between list + types and values for multiple nodes + """ + + def __repr__(self): + return f"{type(self).__name__}(" + ", ".join(repr(i) for i in self) + ")" + + class TypeParser(ty.Generic[T]): """A callable which can be used as a converter for attrs.fields to check whether an object or LazyField matches the specified field type, or can be @@ -159,7 +192,7 @@ def expand_pattern(t): self.superclass_auto_cast = superclass_auto_cast self.match_any_of_union = match_any_of_union - def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: + def __call__(self, obj: ty.Any) -> T: """Attempts to coerce the object to the specified type, unless the value is a LazyField where the type of the field is just checked instead or an attrs.NOTHING where it is simply returned. @@ -180,6 +213,8 @@ def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: if the coercion is not possible, or not specified by the `coercible`/`not_coercible` parameters, then a TypeError is raised """ + from pydra.engine.workflow.lazy import LazyField + coerced: T if obj is attr.NOTHING: coerced = attr.NOTHING # type: ignore[assignment] From 384e57d7780f96768148c4f20e377ebc582f53c3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 5 Dec 2024 14:32:55 +1100 Subject: [PATCH 048/342] fixed up lazy out splitting --- pydra/engine/workflow/lazy.py | 24 ------------------------ pydra/engine/workflow/node.py | 29 ++++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/pydra/engine/workflow/lazy.py b/pydra/engine/workflow/lazy.py index f9d7bbddbb..8fab8723a9 100644 --- a/pydra/engine/workflow/lazy.py +++ b/pydra/engine/workflow/lazy.py @@ -11,7 +11,6 @@ T = ty.TypeVar("T") TypeOrAny = ty.Union[type, ty.Any] -Splitter = ty.Union[str, ty.Tuple[str, ...]] @attrs.define(auto_attribs=True, kw_only=True) @@ -100,29 +99,6 @@ def cast(self, new_type: TypeOrAny) -> Self: # # def combine(self, combiner: str | list[str]) -> Self: - # @classmethod - # def normalize_splitter( - # cls, splitter: Splitter, strip_previous: bool = True - # ) -> ty.Tuple[ty.Tuple[str, ...], ...]: - # """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form - # used in LazyFields""" - # if isinstance(splitter, str): - # splitter = (splitter,) - # if isinstance(splitter, tuple): - # splitter = (splitter,) # type: ignore - # else: - # assert isinstance(splitter, list) - # # convert to frozenset to differentiate from tuple, yet still be hashable - # # (NB: order of fields in list splitters aren't relevant) - # splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) - # # Strip out fields starting with "_" designating splits in upstream nodes - # if strip_previous: - # stripped = tuple( - # tuple(f for f in i if not f.startswith("_")) for i in splitter - # ) - # splitter = tuple(s for s in stripped if s) # type: ignore - # return splitter # type: ignore - def _apply_cast(self, value): """\"Casts\" the value from the retrieved type if a cast has been applied to the lazy-field""" diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 373410b30d..0cf0f8b0d1 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -13,6 +13,7 @@ OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) +Splitter = ty.Union[str, ty.Tuple[str, ...]] @attrs.define @@ -186,7 +187,7 @@ def split( if not self._state or splitter != self._state.splitter: self._set_state(splitter) # Wrap types of lazy outputs in StateArray types - split_depth = len(lazy.LazyField.normalize_splitter(splitter)) + split_depth = len(self._normalize_splitter(splitter)) outpt_lf: lazy.LazyOutField for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): assert not outpt_lf.type_checked @@ -194,7 +195,6 @@ def split( for d in range(split_depth): outpt_type = StateArray[outpt_type] outpt_lf.type = outpt_type - outpt_lf.splits = frozenset(iter(self._state.splitter)) return self def combine( @@ -250,7 +250,7 @@ def combine( else: # self.state and not self.state.combiner self._set_state(splitter=self._state.splitter, combiner=combiner) # Wrap types of lazy outputs in StateArray types - norm_splitter = lazy.LazyField.normalize_splitter(self._state.splitter) + norm_splitter = self._normalize_splitter(self._state.splitter) remaining_splits = [ s for s in norm_splitter if not any(c in s for c in combiner) ] @@ -332,6 +332,29 @@ def _check_if_outputs_have_been_used(self, msg): + msg ) + @classmethod + def _normalize_splitter( + cls, splitter: Splitter, strip_previous: bool = True + ) -> ty.Tuple[ty.Tuple[str, ...], ...]: + """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form + used in LazyFields""" + if isinstance(splitter, str): + splitter = (splitter,) + if isinstance(splitter, tuple): + splitter = (splitter,) # type: ignore + else: + assert isinstance(splitter, list) + # convert to frozenset to differentiate from tuple, yet still be hashable + # (NB: order of fields in list splitters aren't relevant) + splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) + # Strip out fields starting with "_" designating splits in upstream nodes + if strip_previous: + stripped = tuple( + tuple(f for f in i if not f.startswith("_")) for i in splitter + ) + splitter = tuple(s for s in stripped if s) # type: ignore + return splitter # type: ignore + @attrs.define(auto_attribs=False) class Workflow(ty.Generic[OutputType]): From 553bb2fc46ec2034d5a87c9c6b0feae700c4e75e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 6 Dec 2024 13:43:06 +1100 Subject: [PATCH 049/342] fixing up state maintenance in workflow nodes --- pydra/design/tests/test_workflow.py | 77 +++---- pydra/engine/workflow/base.py | 2 +- pydra/engine/workflow/lazy.py | 158 ++++++-------- pydra/engine/workflow/node.py | 327 +++++++++------------------- pydra/utils/typing.py | 4 +- 5 files changed, 208 insertions(+), 360 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 9311ddb601..9f25bf81bd 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -9,18 +9,36 @@ from pydra.engine.specs import TaskSpec from fileformats import video, image +# NB: We use PascalCase for interfaces and workflow functions as it is translated into a class -def test_workflow(): - # NB: We use PascalCase (i.e. class names) as it is translated into a class +@python.define +def Add(a, b): + return a + b - @python.define - def Add(a, b): - return a + b - @python.define - def Mul(a, b): - return a * b +@python.define +def Mul(a, b): + return a * b + + +@python.define(outputs=["divided"]) +def Divide(x, y): + return x / y + + +@python.define +def Sum(x: list[float]) -> float: + return sum(x) + + +def a_converter(value): + if value is attrs.NOTHING: + return value + return float(value) + + +def test_workflow(): @workflow.define def MyTestWorkflow(a, b): @@ -109,7 +127,7 @@ def MyTestShellWorkflow( assert wf.inputs.input_video == input_video assert wf.inputs.watermark == watermark assert wf.outputs.output_video == LazyOutField( - node=wf["resize"], field="out_video", type=video.Mp4 + node=wf["resize"], field="out_video", type=video.Mp4, type_checked=True ) assert list(wf.node_names) == ["add_watermark", "resize"] @@ -119,19 +137,6 @@ def test_workflow_canonical(): # NB: We use PascalCase (i.e. class names) as it is translated into a class - @python.define - def Add(a, b): - return a + b - - @python.define - def Mul(a, b): - return a * b - - def a_converter(value): - if value is attrs.NOTHING: - return value - return float(value) - @workflow.define class MyTestWorkflow(TaskSpec["MyTestWorkflow.Outputs"]): @@ -220,10 +225,10 @@ def MyTestShellWorkflow( ) wf = Workflow.construct(workflow_spec) assert wf["add_watermark"].inputs.in_video == LazyInField( - node=wf, field="input_video", type=video.Mp4 + workflow=wf, field="input_video", type=video.Mp4, type_checked=True ) assert wf["add_watermark"].inputs.watermark == LazyInField( - node=wf, field="watermark", type=image.Png + workflow=wf, field="watermark", type=image.Png, type_checked=True ) @@ -236,10 +241,6 @@ def Add(x, y): def Mul(x, y): return x * y - @python.define(outputs=["divided"]) - def Divide(x, y): - return x / y - @workflow.define(outputs=["out1", "out2"]) def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: """A test workflow demonstration a few alternative ways to set and connect nodes @@ -279,7 +280,9 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=float) + assert wf.outputs.out1 == LazyOutField( + node=wf["Mul"], field="out", type=float, type_checked=True + ) assert wf.outputs.out2 == LazyOutField( node=wf["division"], field="divided", type=ty.Any ) @@ -288,14 +291,6 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: def test_workflow_set_outputs_directly(): - @python.define - def Add(a, b): - return a + b - - @python.define - def Mul(a, b): - return a * b - @workflow.define(outputs={"out1": float, "out2": float}) def MyTestWorkflow(a: int, b: float): @@ -362,10 +357,6 @@ def Mul(x: float, y: float) -> float: def Add(x: float, y: float) -> float: return x + y - @python.define - def Sum(x: list[float]) -> float: - return sum(x) - @workflow.define def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: mul = workflow.add(Mul()).split(x=a, y=b) @@ -387,11 +378,11 @@ def test_workflow_split_after_access_fail(): """ @python.define - def Add(x, y): + def Add(x: float, y: float) -> float: return x + y @python.define - def Mul(x, y): + def Mul(x: float, y: float) -> float: return x * y @workflow.define diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index 5026109111..cbfbe6d1c2 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -77,7 +77,7 @@ def construct( lazy_spec, lzy_inpt.name, LazyInField( - node=wf, + workflow=wf, field=lzy_inpt.name, type=lzy_inpt.type, ), diff --git a/pydra/engine/workflow/lazy.py b/pydra/engine/workflow/lazy.py index 8fab8723a9..76f5a07178 100644 --- a/pydra/engine/workflow/lazy.py +++ b/pydra/engine/workflow/lazy.py @@ -1,7 +1,9 @@ import typing as ty +import abc from typing_extensions import Self import attrs from pydra.utils.typing import StateArray +from pydra.utils.hash import hash_single from . import node if ty.TYPE_CHECKING: @@ -13,91 +15,22 @@ TypeOrAny = ty.Union[type, ty.Any] -@attrs.define(auto_attribs=True, kw_only=True) -class LazyField(ty.Generic[T]): +@attrs.define(kw_only=True) +class LazyField(ty.Generic[T], metaclass=abc.ABCMeta): """Lazy fields implement promises.""" - node: node.Node field: str type: TypeOrAny - # Set of splitters that have been applied to the lazy field. Note that the splitter - # specifications are transformed to a tuple[tuple[str, ...], ...] form where the - # outer tuple is the outer product, the inner tuple are inner products (where either - # product can be of length==1) - splits: ty.FrozenSet[ty.Tuple[ty.Tuple[str, ...], ...]] = attrs.field( - factory=frozenset, converter=frozenset - ) cast_from: ty.Optional[ty.Type[ty.Any]] = None - # type_checked will be set to False after it is created but defaults to True here for - # ease of testing - type_checked: bool = True + type_checked: bool = False def __bytes_repr__(self, cache): - yield type(self).__name__.encode() - yield self.name.encode() - yield self.field.encode() - - def cast(self, new_type: TypeOrAny) -> Self: - """ "casts" the lazy field to a new type - - Parameters - ---------- - new_type : type - the type to cast the lazy-field to - - Returns - ------- - cast_field : LazyField - a copy of the lazy field with the new type - """ - return type(self)[new_type]( - name=self.name, - field=self.field, - type=new_type, - splits=self.splits, - cast_from=self.cast_from if self.cast_from else self.type, - ) - - # def split(self, splitter: Splitter) -> Self: - # """ "Splits" the lazy field over an array of nodes by replacing the sequence type - # of the lazy field with StateArray to signify that it will be "split" across - - # Parameters - # ---------- - # splitter : str or ty.Tuple[str, ...] or ty.List[str] - # the splitter to append to the list of splitters - # """ - # from pydra.utils.typing import ( - # TypeParser, - # ) # pylint: disable=import-outside-toplevel - - # splits = self.splits | set([LazyField.normalize_splitter(splitter)]) - # # Check to see whether the field has already been split over the given splitter - # if splits == self.splits: - # return self - - # # Modify the type of the lazy field to include the split across a state-array - # inner_type, prev_split_depth = TypeParser.strip_splits(self.type) - # assert prev_split_depth <= 1 - # if inner_type is ty.Any: - # type_ = StateArray[ty.Any] - # elif TypeParser.matches_type(inner_type, list): - # item_type = TypeParser.get_item_type(inner_type) - # type_ = StateArray[item_type] - # else: - # raise TypeError( - # f"Cannot split non-sequence field {self} of type {inner_type}" - # ) - # if prev_split_depth: - # type_ = StateArray[type_] - # return type(self)[type_]( - # name=self.name, - # field=self.field, - # type=type_, - # splits=splits, - # ) - - # # def combine(self, combiner: str | list[str]) -> Self: + yield type(self).__name__.encode() + b"(" + yield from bytes(hash_single(self.source, cache)) + yield b"field=" + self.field.encode() + yield b"type=" + bytes(hash_single(self.type, cache)) + yield b"cast_from=" + bytes(hash_single(self.cast_from, cache)) + yield b")" def _apply_cast(self, value): """\"Casts\" the value from the retrieved type if a cast has been applied to @@ -110,9 +43,11 @@ def _apply_cast(self, value): return value -@attrs.define(auto_attribs=True, kw_only=True) +@attrs.define(kw_only=True) class LazyInField(LazyField[T]): + workflow: "Workflow" = attrs.field() + attr_type = "input" def __eq__(self, other): @@ -120,9 +55,12 @@ def __eq__(self, other): isinstance(other, LazyInField) and self.field == other.field and self.type == other.type - and self.splits == other.splits ) + @property + def source(self): + return self.workflow + def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: """Return the value of a lazy field. @@ -155,8 +93,31 @@ def apply_splits(obj, depth): value = self._apply_cast(value) return value + def cast(self, new_type: TypeOrAny) -> Self: + """ "casts" the lazy field to a new type + Parameters + ---------- + new_type : type + the type to cast the lazy-field to + + Returns + ------- + cast_field : LazyInField + a copy of the lazy field with the new type + """ + return type(self)[new_type]( + workflow=self.workflow, + field=self.field, + type=new_type, + cast_from=self.cast_from if self.cast_from else self.type, + ) + + +@attrs.define(kw_only=True) class LazyOutField(LazyField[T]): + + node: node.Node attr_type = "output" def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: @@ -178,16 +139,15 @@ def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty. TypeParser, ) # pylint: disable=import-outside-toplevel - node = getattr(wf, self.name) - result = node.result(state_index=state_index) + result = self.node.result(state_index=state_index) if result is None: raise RuntimeError( - f"Could not find results of '{node.name}' node in a sub-directory " - f"named '{node.checksum}' in any of the cache locations.\n" - + "\n".join(str(p) for p in set(node.cache_locations)) + f"Could not find results of '{self.node.name}' node in a sub-directory " + f"named '{self.node.checksum}' in any of the cache locations.\n" + + "\n".join(str(p) for p in set(self.node.cache_locations)) + f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. " - f"Current values and hashes: {node.inputs}, " - f"{node.inputs._hashes}\n\n" + f"Current values and hashes: {self.node.inputs}, " + f"{self.node.inputs._hashes}\n\n" "Set loglevel to 'debug' in order to track hash changes " "throughout the execution of the workflow.\n\n " "These issues may have been caused by `bytes_repr()` methods " @@ -224,3 +184,27 @@ def get_nested_results(res, depth: int): value = get_nested_results(result, depth=split_depth) value = self._apply_cast(value) return value + + @property + def source(self): + return self.node + + def cast(self, new_type: TypeOrAny) -> Self: + """ "casts" the lazy field to a new type + + Parameters + ---------- + new_type : type + the type to cast the lazy-field to + + Returns + ------- + cast_field : LazyOutField + a copy of the lazy field with the new type + """ + return type(self)[new_type]( + node=self.node, + field=self.field, + type=new_type, + cast_from=self.cast_from if self.cast_from else self.type, + ) diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 0cf0f8b0d1..7f5b32972a 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -1,20 +1,25 @@ import typing as ty -from copy import copy, deepcopy -from operator import itemgetter -from typing_extensions import Self +from copy import deepcopy +from enum import Enum import attrs -from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray from . import lazy from ..specs import TaskSpec, OutputsSpec +from ..helpers import ensure_list from .. import helpers_state as hlpst -from ..helpers import ensure_list, list_fields -from .. import state +from ..state import State + +if ty.TYPE_CHECKING: + from .base import Workflow OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) Splitter = ty.Union[str, ty.Tuple[str, ...]] +_not_set = Enum("_not_set", "NOT_SET") + +NOT_SET = _not_set.NOT_SET + @attrs.define class Node(ty.Generic[OutputType]): @@ -34,15 +39,18 @@ class Node(ty.Generic[OutputType]): _lzout: OutputType | None = attrs.field( init=False, default=None, eq=False, hash=False ) - _state: state.State | None = attrs.field(init=False, default=None) + _state: State | None = attrs.field(init=False, default=NOT_SET) _cont_dim: dict[str, int] | None = attrs.field( init=False, default=None ) # QUESTION: should this be included in the state? + _inner_cont_dim: dict[str, int] | None = attrs.field( + init=False, default=None + ) # QUESTION: should this be included in the state? class Inputs: """A class to wrap the inputs of a node and control access to them so lazy fields - that will change the downstream state aren't set after the node has been split, - combined or its outputs accessed + that will change the downstream state (i.e. with new splits) aren't set after + the node has been split, combined or its outputs accessed. """ _node: "Node" @@ -55,21 +63,52 @@ def __getattr__(self, name: str) -> ty.Any: def __setattr__(self, name: str, value: ty.Any) -> None: if isinstance(value, lazy.LazyField): - if self._node.state: - - raise AttributeError( - "Cannot set inputs on a node that has been split or combined" + # Save the current state for comparison later + prev_state = self._node.state + if value.node.state: + # Reset the state to allow the lazy field to be set + self._node._state = NOT_SET + setattr(self._node._spec, name, value) + if value.node.state and self._node.state != prev_state: + self._node._check_if_outputs_have_been_used( + f"cannot set {name!r} input to {value} because it changes the " + f"state of the node from {prev_state} to {value.node.state}" ) - setattr(self._node._spec, name, value) - - @property - def state(self): - return self._state @property def inputs(self) -> Inputs: return self.Inputs(self) + @property + def state(self): + if self._state is not NOT_SET: + return self._state + upstream_states = {} + for inpt_name, val in self.input_values: + if isinstance(val, lazy.LazyOutField) and val.node.state: + node: Node = val.node + # variables that are part of inner splitters should be treated as a containers + if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: + node._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 + # adding task_name: (task.state, [a field from the connection] + if node.name not in upstream_states: + upstream_states[node.name] = (node.state, [inpt_name]) + else: + # if the task already exist in other_state, + # additional field name should be added to the list of fields + upstream_states[node.name][1].append(inpt_name) + if upstream_states: + state = State( + node.name, + splitter=None, + other_states=upstream_states, + combiner=None, + ) + else: + state = None + self._state = state + return state + @property def input_values(self) -> tuple[tuple[str, ty.Any]]: return tuple(attrs.asdict(self._spec, recurse=False).items()) @@ -81,22 +120,12 @@ def lzout(self) -> OutputType: """The output spec of the node populated with lazy fields""" if self._lzout is not None: return self._lzout - combined_splitter = set() - for inpt_name, inpt_val in self.input_values: - if isinstance(inpt_val, lazy.LazyField): - combined_splitter.update(inpt_val.splits) lazy_fields = {} for field in list_fields(self.inputs.Outputs): - type_ = field.type - # Wrap types of lazy outputs in StateArray types if the input fields are split - # over state values - for _ in range(len(combined_splitter)): - type_ = StateArray[type_] lazy_fields[field.name] = lazy.LazyOutField( node=self, field=field.name, - type=type_, - splits=frozenset(iter(combined_splitter)), + type=field.type, ) outputs = self.inputs.Outputs(**lazy_fields) # Flag the output lazy fields as being not typed checked (i.e. assigned to another @@ -105,6 +134,7 @@ def lzout(self) -> OutputType: outpt.type_checked = False outputs._node = self self._lzout = outputs + self._wrap_lzout_types_in_state_arrays() return outputs def split( @@ -187,14 +217,7 @@ def split( if not self._state or splitter != self._state.splitter: self._set_state(splitter) # Wrap types of lazy outputs in StateArray types - split_depth = len(self._normalize_splitter(splitter)) - outpt_lf: lazy.LazyOutField - for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): - assert not outpt_lf.type_checked - outpt_type = outpt_lf.type - for d in range(split_depth): - outpt_type = StateArray[outpt_type] - outpt_lf.type = outpt_type + self._wrap_lzout_types_in_state_arrays() return self def combine( @@ -249,24 +272,7 @@ def combine( ) else: # self.state and not self.state.combiner self._set_state(splitter=self._state.splitter, combiner=combiner) - # Wrap types of lazy outputs in StateArray types - norm_splitter = self._normalize_splitter(self._state.splitter) - remaining_splits = [ - s for s in norm_splitter if not any(c in s for c in combiner) - ] - combine_depth = len(norm_splitter) - len(remaining_splits) - outpt_lf: lazy.LazyOutField - for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): - assert not outpt_lf.type_checked - outpt_type, split_depth = TypeParser.strip_splits(outpt_lf.type) - assert split_depth >= combine_depth, ( - f"Attempting to combine a field that has not been split enough times: " - f"{outpt_lf.name} ({outpt_lf.type}), {self._state.splitter} -> {combiner}" - ) - outpt_lf.type = list[outpt_type] - for _ in range(split_depth - combine_depth): - outpt_lf.type = StateArray[outpt_lf.type] - outpt_lf.splits = frozenset(iter(remaining_splits)) + self._wrap_lzout_types_in_state_arrays() return self def _set_state(self, splitter, combiner=None): @@ -284,9 +290,7 @@ def _set_state(self, splitter, combiner=None): task has been run """ if splitter is not None: - self._state = state.State( - name=self.name, splitter=splitter, combiner=combiner - ) + self._state = State(name=self.name, splitter=splitter, combiner=combiner) else: self._state = None return self._state @@ -332,173 +336,40 @@ def _check_if_outputs_have_been_used(self, msg): + msg ) - @classmethod - def _normalize_splitter( - cls, splitter: Splitter, strip_previous: bool = True - ) -> ty.Tuple[ty.Tuple[str, ...], ...]: - """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form - used in LazyFields""" - if isinstance(splitter, str): - splitter = (splitter,) - if isinstance(splitter, tuple): - splitter = (splitter,) # type: ignore - else: - assert isinstance(splitter, list) - # convert to frozenset to differentiate from tuple, yet still be hashable - # (NB: order of fields in list splitters aren't relevant) - splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) - # Strip out fields starting with "_" designating splits in upstream nodes - if strip_previous: - stripped = tuple( - tuple(f for f in i if not f.startswith("_")) for i in splitter - ) - splitter = tuple(s for s in stripped if s) # type: ignore - return splitter # type: ignore - - -@attrs.define(auto_attribs=False) -class Workflow(ty.Generic[OutputType]): - """A workflow, constructed from a workflow specification - - Parameters - ---------- - name : str - The name of the workflow - inputs : TaskSpec - The input specification of the workflow - outputs : TaskSpec - The output specification of the workflow - """ - - name: str = attrs.field() - inputs: TaskSpec[OutputType] = attrs.field() - outputs: OutputType = attrs.field() - _nodes: dict[str, Node] = attrs.field(factory=dict) - - @classmethod - def construct( - cls, - spec: TaskSpec[OutputType], - ) -> Self: - """Construct a workflow from a specification, caching the constructed worklow""" - - lazy_inputs = [f for f in list_fields(type(spec)) if f.lazy] - - # Create a cache key by hashing all the non-lazy input values in the spec - # and use this to store the constructed workflow in case it is reused or nested - # and split over within another workflow - lazy_input_names = {f.name for f in lazy_inputs} - non_lazy_vals = tuple( - sorted( - ( - i - for i in attrs.asdict(spec, recurse=False).items() - if i[0] not in lazy_input_names - ), - key=itemgetter(0), - ) - ) - hash_key = hash_function(non_lazy_vals) - if hash_key in cls._constructed: - return cls._constructed[hash_key] - - # Initialise the outputs of the workflow - outputs = spec.Outputs( - **{f.name: attrs.NOTHING for f in attrs.fields(spec.Outputs)} - ) - - # Initialise the lzin fields - lazy_spec = copy(spec) - wf = cls.under_construction = Workflow( - name=type(spec).__name__, - inputs=lazy_spec, - outputs=outputs, - ) - for lzy_inpt in lazy_inputs: - setattr( - lazy_spec, - lzy_inpt.name, - lazy.LazyInField( - field=lzy_inpt.name, - type=lzy_inpt.type, - ), - ) - - input_values = attrs.asdict(lazy_spec, recurse=False) - constructor = input_values.pop("constructor") - cls._under_construction = wf - try: - # Call the user defined constructor to set the outputs - output_lazy_fields = constructor(**input_values) - # Check to see whether any mandatory inputs are not set - for node in wf.nodes: - node.inputs._check_for_unset_values() - # Check that the outputs are set correctly, either directly by the constructor - # or via returned values that can be zipped with the output names - if output_lazy_fields: - if not isinstance(output_lazy_fields, (list, tuple)): - output_lazy_fields = [output_lazy_fields] - output_fields = list_fields(spec.Outputs) - if len(output_lazy_fields) != len(output_fields): - raise ValueError( - f"Expected {len(output_fields)} outputs, got " - f"{len(output_lazy_fields)} ({output_lazy_fields})" - ) - for outpt, outpt_lf in zip(output_fields, output_lazy_fields): - if TypeParser.get_origin(outpt_lf.type) is StateArray: - # Automatically combine any uncombined state arrays into lists - tp, _ = TypeParser.strip_splits(outpt_lf.type) - outpt_lf.type = list[tp] - outpt_lf.splits = frozenset() - setattr(outputs, outpt.name, outpt_lf) - else: - if unset_outputs := [ - a - for a, v in attrs.asdict(outputs, recurse=False).items() - if v is attrs.NOTHING - ]: - raise ValueError( - f"Expected outputs {unset_outputs} to be set by the " - f"constructor of {wf!r}" - ) - finally: - cls._under_construction = None - - cls._constructed[hash_key] = wf - - return wf - - def add(self, task_spec: TaskSpec[OutputType], name=None) -> OutputType: - if name is None: - name = type(task_spec).__name__ - if name in self._nodes: - raise ValueError(f"Node with name {name!r} already exists in the workflow") - node = Node[OutputType](name=name, inputs=task_spec, workflow=self) - self._nodes[name] = node - return node.lzout - - def __getitem__(self, key: str) -> Node: - return self._nodes[key] - - @property - def nodes(self) -> ty.Iterable[Node]: - return self._nodes.values() - - @property - def node_names(self) -> list[str]: - return list(self._nodes) - - @property - @classmethod - def under_construction(cls) -> "Workflow[ty.Any]": - if cls._under_construction is None: - raise ValueError( - "pydra.design.workflow.this() can only be called from within a workflow " - "constructor function" - ) - return cls._under_construction - - # Used to store the workflow that is currently being constructed - _under_construction: "Workflow[ty.Any]" = None - # Used to cache the constructed workflows by their hashed input values - _constructed: dict[int, "Workflow[ty.Any]"] = {} + def _wrap_lzout_types_in_state_arrays(self) -> None: + """Wraps a types of the lazy out fields in a number of nested StateArray types + based on the number of states the node is split over""" + # Unwrap StateArray types from the output types + if not self.state: + return + outpt_lf: lazy.LazyOutField + state_depth = len(self.state.splitter_rpn) + for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): + assert not outpt_lf.type_checked + type_, _ = TypeParser.strip_splits(outpt_lf.type) + for _ in range(state_depth): + type_ = StateArray[type_] + outpt_lf.type = type_ + + # @classmethod + # def _normalize_splitter( + # cls, splitter: Splitter, strip_previous: bool = True + # ) -> ty.Tuple[ty.Tuple[str, ...], ...]: + # """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form + # used in LazyFields""" + # if isinstance(splitter, str): + # splitter = (splitter,) + # if isinstance(splitter, tuple): + # splitter = (splitter,) # type: ignore + # else: + # assert isinstance(splitter, list) + # # convert to frozenset to differentiate from tuple, yet still be hashable + # # (NB: order of fields in list splitters aren't relevant) + # splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) + # # Strip out fields starting with "_" designating splits in upstream nodes + # if strip_previous: + # stripped = tuple( + # tuple(f for f in i if not f.startswith("_")) for i in splitter + # ) + # splitter = tuple(s for s in stripped if s) # type: ignore + # return splitter # type: ignore diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index decdc81e0f..58249ddbfd 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -251,7 +251,9 @@ def __call__(self, obj: ty.Any) -> T: "coerced to one that is)" ) from e coerced = obj # type: ignore - obj.type_checked = True # Used to check whether the type can be changed + if obj.type is not ty.Any: + # Used to check whether the type of the field can be changed + obj.type_checked = True elif isinstance(obj, StateArray): coerced = StateArray(self(o) for o in obj) # type: ignore[assignment] else: From 032fd4e04886ccdcfffb565317f2c4c06e54ee75 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 09:51:05 +1100 Subject: [PATCH 050/342] restored functionality from specs --- pydra/design/base.py | 39 +- pydra/design/python.py | 8 +- pydra/design/shell.py | 33 +- pydra/design/tests/test_workflow.py | 4 +- pydra/design/workflow.py | 14 +- pydra/engine/core.py | 108 +-- pydra/engine/helpers.py | 19 +- pydra/engine/helpers_file.py | 5 +- pydra/engine/specs.py | 1307 +++++++++++---------------- pydra/engine/task.py | 162 +--- pydra/engine/workflow/base.py | 23 +- pydra/engine/workflow/node.py | 21 +- pydra/utils/typing.py | 4 +- 13 files changed, 629 insertions(+), 1118 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 0fbf79ac82..3f959358af 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -14,6 +14,7 @@ ensure_list, PYDRA_ATTR_METADATA, list_fields, + is_lazy, ) from pydra.utils.typing import ( MultiInputObj, @@ -21,11 +22,10 @@ MultiOutputObj, MultiOutputFile, ) -from pydra.engine.workflow.lazy import LazyField if ty.TYPE_CHECKING: - from pydra.engine.specs import OutputsSpec + from pydra.engine.specs import TaskSpec, OutSpec from pydra.engine.core import Task __all__ = [ @@ -84,7 +84,9 @@ class Field: validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) ) help_string: str = "" - requires: list | None = None + requires: list[str] | list[list[str]] = attrs.field( + factory=list, converter=ensure_list + ) converter: ty.Callable | None = None validator: ty.Callable | None = None @@ -240,6 +242,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: def make_task_spec( + spec_type: type["TaskSpec"], + out_type: type["OutSpec"], task_type: type["Task"], inputs: dict[str, Arg], outputs: dict[str, Out], @@ -281,14 +285,16 @@ def make_task_spec( if name is None and klass is not None: name = klass.__name__ - outputs_klass = make_outputs_spec(outputs, outputs_bases, name) - if klass is None or not issubclass(klass, TaskSpec): + outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) + if klass is None or not issubclass(klass, spec_type): if name is None: raise ValueError("name must be provided if klass is not") + if klass is not None and issubclass(klass, TaskSpec): + raise ValueError(f"Cannot change type of spec {klass} to {spec_type}") bases = tuple(bases) # Ensure that TaskSpec is a base class - if not any(issubclass(b, TaskSpec) for b in bases): - bases = bases + (TaskSpec,) + if not any(issubclass(b, spec_type) for b in bases): + bases = bases + (spec_type,) # If building from a decorated class (as opposed to dynamically from a function # or shell-template), add any base classes not already in the bases tuple if klass is not None: @@ -346,8 +352,11 @@ def make_task_spec( def make_outputs_spec( - outputs: dict[str, Out], bases: ty.Sequence[type], spec_name: str -) -> type["OutputsSpec"]: + spec_type: type["OutSpec"], + outputs: dict[str, Out], + bases: ty.Sequence[type], + spec_name: str, +) -> type["OutSpec"]: """Create an outputs specification class and its outputs specification class from the output fields provided to the decorator/function. @@ -368,10 +377,14 @@ def make_outputs_spec( klass : type The class created using the attrs package """ - from pydra.engine.specs import OutputsSpec + from pydra.engine.specs import OutSpec - if not any(issubclass(b, OutputsSpec) for b in bases): - outputs_bases = bases + (OutputsSpec,) + if not any(issubclass(b, spec_type) for b in bases): + if out_spec_bases := [b for b in bases if issubclass(b, OutSpec)]: + raise ValueError( + f"Cannot make {spec_type} output spec from {out_spec_bases} bases" + ) + outputs_bases = bases + (spec_type,) if reserved_names := [n for n in outputs if n in RESERVED_OUTPUT_NAMES]: raise ValueError( f"{reserved_names} are reserved and cannot be used for output field names" @@ -549,7 +562,7 @@ def make_validator(field: Field, interface_name: str) -> ty.Callable[..., None] def allowed_values_validator(_, attribute, value): """checking if the values is in allowed_values""" allowed = attribute.metadata[PYDRA_ATTR_METADATA].allowed_values - if value is attrs.NOTHING or isinstance(value, LazyField): + if value is attrs.NOTHING or is_lazy(value): pass elif value not in allowed: raise ValueError( diff --git a/pydra/design/python.py b/pydra/design/python.py index b25d36e010..9de6860e1d 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -2,7 +2,7 @@ import inspect import attrs from pydra.engine.task import FunctionTask -from pydra.engine.specs import TaskSpec +from pydra.engine.specs import PythonSpec, PythonOutSpec from .base import ( Arg, Out, @@ -87,7 +87,7 @@ def define( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, -) -> TaskSpec: +) -> PythonSpec: """ Create an interface for a function or a class. @@ -103,7 +103,7 @@ def define( Whether to use auto_attribs mode when creating the class. """ - def make(wrapped: ty.Callable | type) -> TaskSpec: + def make(wrapped: ty.Callable | type) -> PythonSpec: if inspect.isclass(wrapped): klass = wrapped function = klass.function @@ -139,6 +139,8 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: ) interface = make_task_spec( + PythonSpec, + PythonOutSpec, FunctionTask, parsed_inputs, parsed_outputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 21d5d435c9..6587608960 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -11,7 +11,7 @@ from fileformats.core import from_mime from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError -from pydra.engine.specs import TaskSpec +from pydra.engine.specs import ShellSpec, ShellOutSpec from .base import ( Arg, Out, @@ -177,9 +177,8 @@ class outarg(Out, arg): inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). path_template: str, optional - If provided, the field is treated also as an output field and it is added to - the output spec. The template can use other fields, e.g. {file1}. Used in order - to create an output specification. + The template used to specify where the output file will be written to can use + other fields, e.g. {file1}. Used in order to create an output specification. """ path_template: str | None = attrs.field(default=None) @@ -202,7 +201,7 @@ def define( outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, name: str | None = None, -) -> TaskSpec: +) -> ShellSpec: """Create a task specification for a shell command. Can be used either as a decorator on the "canonical" dataclass-form of a task specification or as a function that takes a "shell-command template string" of the form @@ -251,13 +250,13 @@ def define( Returns ------- - TaskSpec + ShellSpec The interface for the shell command """ def make( wrapped: ty.Callable | type | None = None, - ) -> TaskSpec: + ) -> ShellSpec: if inspect.isclass(wrapped): klass = wrapped @@ -272,6 +271,14 @@ def make( f"Shell task class {wrapped} must have an `executable` " "attribute that specifies the command to run" ) from None + if not isinstance(executable, str) and not ( + isinstance(executable, ty.Sequence) + and all(isinstance(e, str) for e in executable) + ): + raise ValueError( + "executable must be a string or a sequence of strings" + f", not {executable!r}" + ) class_name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) parsed_inputs, parsed_outputs = extract_fields_from_class( @@ -309,7 +316,15 @@ def make( {o.name: o for o in parsed_outputs.values() if isinstance(o, arg)} ) parsed_inputs["executable"] = arg( - name="executable", type=str, argstr="", position=0, default=executable + name="executable", + type=str | ty.Sequence[str], + argstr="", + position=0, + default=executable, + help_string=( + "the first part of the command, can be a string, " + "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" + ), ) # Set positions for the remaining inputs that don't have an explicit position @@ -319,6 +334,8 @@ def make( inpt.position = position_stack.pop(0) interface = make_task_spec( + ShellSpec, + ShellOutSpec, ShellCommandTask, parsed_inputs, parsed_outputs, diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 9f25bf81bd..f4c1c0c19d 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -344,7 +344,9 @@ def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] assert wf["Mul"].combiner == ["Mul.x"] - assert wf.outputs.out == LazyOutField(node=wf["Sum"], field="out", type=list[float]) + assert wf.outputs.out == LazyOutField( + node=wf["Sum"], field="out", type=list[float], type_checked=True + ) def test_workflow_split_combine2(): diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 75ac13197f..86a9f3ca9a 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -13,7 +13,7 @@ check_explicit_fields_are_none, extract_fields_from_class, ) -from pydra.engine.specs import TaskSpec +from pydra.engine.specs import TaskSpec, OutSpec, WorkflowSpec, WorkflowOutSpec __all__ = ["define", "add", "this", "arg", "out"] @@ -154,6 +154,8 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: parsed_inputs[inpt_name].lazy = True interface = make_task_spec( + WorkflowSpec, + WorkflowOutSpec, WorkflowTask, parsed_inputs, parsed_outputs, @@ -172,9 +174,6 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: return make -OutputType = ty.TypeVar("OutputType") - - def this() -> Workflow: """Get the workflow currently being constructed. @@ -186,7 +185,10 @@ def this() -> Workflow: return Workflow.under_construction -def add(task_spec: TaskSpec[OutputType], name: str = None) -> OutputType: +OutSpecType = ty.TypeVar("OutSpecType", bound=OutSpec) + + +def add(task_spec: TaskSpec[OutSpecType], name: str = None) -> OutSpecType: """Add a node to the workflow currently being constructed Parameters @@ -199,7 +201,7 @@ def add(task_spec: TaskSpec[OutputType], name: str = None) -> OutputType: Returns ------- - OutputType + OutSpec The outputs specification of the node """ return this().add(task_spec, name=name) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 4607e23f71..7cf35f455f 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -3,8 +3,6 @@ import abc import json import logging -import itertools -from functools import cached_property import os import sys from pathlib import Path @@ -21,16 +19,12 @@ from . import helpers_state as hlpst from .specs import ( File, - # BaseSpec, RuntimeSpec, Result, - # SpecInfo, - # LazyIn, - # LazyOut, TaskHook, ) +from .workflow.lazy import is_lazy from .helpers import ( - # make_klass, create_checksum, attr_fields, print_help, @@ -138,8 +132,6 @@ def __init__( self.interface = spec # raise error if name is same as of attributes - if name in dir(self): - raise ValueError("Cannot use names of attributes or methods as task name") self.name = name if not self.input_spec: raise Exception("No input_spec in class: %s" % self.__class__.__name__) @@ -227,10 +219,6 @@ def __setstate__(self, state): state["inputs"] = self.interface(**state["inputs"]) self.__dict__.update(state) - @cached_property - def lzout(self): - return LazyOut(self) - def help(self, returnhelp=False): """Print class help.""" help_obj = print_help(self) @@ -818,80 +806,6 @@ def _check_for_hash_changes(self): DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any -def _sanitize_spec( - spec: ty.Union[ty.List[str], ty.Dict[str, ty.Type[ty.Any]], None], - wf_name: str, - spec_name: str, - allow_empty: bool = False, -): - """Makes sure the provided input specifications are valid. - - If the input specification is a list of strings, this will - build a proper SpecInfo object out of it. - - Parameters - ---------- - spec : SpecInfo or List[str] or Dict[str, type] - Specification to be sanitized. - wf_name : str - The name of the workflow for which the input specifications - spec_name : str - name given to generated SpecInfo object - - Returns - ------- - spec : SpecInfo - Sanitized specification. - - Raises - ------ - ValueError - If provided `spec` is None. - """ - graph_checksum_input = ("_graph_checksums", ty.Any) - if spec: - if isinstance(spec, SpecInfo): - if BaseSpec not in spec.bases: - raise ValueError("Provided SpecInfo must have BaseSpec as its base.") - if "_graph_checksums" not in {f[0] for f in spec.fields}: - spec.fields.insert(0, graph_checksum_input) - return spec - else: - base = BaseSpec - if isinstance(spec, list): - typed_spec = zip(spec, itertools.repeat(ty.Any)) - elif isinstance(spec, dict): - typed_spec = spec.items() # type: ignore - elif isinstance(spec, BaseSpec): - base = spec - typed_spec = [] - else: - raise TypeError( - f"Unrecognised spec type, {spec}, should be SpecInfo, list or dict" - ) - return SpecInfo( - name=spec_name, - fields=[graph_checksum_input] - + [ - ( - nm, - attr.ib( - type=tp, - metadata={ - "help_string": f"{nm} input from {wf_name} workflow" - }, - ), - ) - for nm, tp in typed_spec - ], - bases=(base,), - ) - elif allow_empty: - return None - else: - raise ValueError(f'Empty "{spec_name}" spec provided to Workflow {wf_name}.') - - class WorkflowTask(Task): """A composite task with structure of computational graph.""" @@ -939,10 +853,6 @@ def __init__( TODO """ - self.input_spec = _sanitize_spec(input_spec, name, "Inputs") - self.output_spec = _sanitize_spec( - output_spec, name, "Outputs", allow_empty=True - ) if name in dir(self): raise ValueError( @@ -974,10 +884,6 @@ def __init__( # propagating rerun if task_rerun=True self.propagate_rerun = propagate_rerun - @cached_property - def lzin(self): - return LazyIn(self) - def __getattr__(self, name): if name in self.name2obj: return self.name2obj[name] @@ -1075,7 +981,7 @@ def create_connections(self, task, detailed=False): other_states = {} for field in attr_fields(task.inputs): val = getattr(task.inputs, field.name) - if isinstance(val, LazyField): + if is_lazy(val): # saving all connections with LazyFields task.inp_lf[field.name] = val # adding an edge to the graph if task id expecting output from a different task @@ -1292,7 +1198,7 @@ def _collect_outputs(self): # collecting outputs from tasks output_wf = {} for name, val in self._connections: - if not isinstance(val, LazyField): + if not is_lazy(val): raise ValueError("all connections must be lazy") try: val_out = val.get_value(self) @@ -1395,11 +1301,3 @@ def is_task(obj): def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" return isinstance(obj, WorkflowTask) - - -def is_lazy(obj): - """Check whether an object has any field that is a Lazy Field""" - for f in attr_fields(obj): - if isinstance(getattr(obj, f.name), LazyField): - return True - return False diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 92efc9de53..dc85205521 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -45,11 +45,10 @@ def list_fields(interface: "TaskSpec") -> list["Field"]: def from_list_if_single(obj): """Converts a list to a single item if it is of length == 1""" - from pydra.engine.workflow.lazy import LazyField if obj is attrs.NOTHING: return obj - if isinstance(obj, LazyField): + if is_lazy(obj): return obj obj = list(obj) if len(obj) == 1: @@ -637,7 +636,6 @@ def ensure_list(obj, tuple2list=False): [5.0] """ - from pydra.engine.workflow.lazy import LazyField if obj is attrs.NOTHING: return attrs.NOTHING @@ -648,6 +646,19 @@ def ensure_list(obj, tuple2list=False): return obj elif tuple2list and isinstance(obj, tuple): return list(obj) - elif isinstance(obj, LazyField): + elif is_lazy(obj): return obj return [obj] + + +def is_lazy(obj): + """Check whether an object has any field that is a Lazy Field""" + from pydra.engine.workflow.lazy import LazyField + + if is_lazy(obj): + return True + + for f in attr_fields(obj): + if isinstance(getattr(obj, f.name), LazyField): + return True + return False diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 8be955b20e..f846e40db2 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -10,6 +10,7 @@ from contextlib import contextmanager import attr from fileformats.core import FileSet +from pydra.engine.helpers import is_lazy logger = logging.getLogger("pydra") @@ -151,7 +152,7 @@ def template_update_single( # if input_dict_st with state specific value is not available, # the dictionary will be created from inputs object from pydra.utils.typing import TypeParser # noqa - from pydra.engine.specs import LazyField, OUTPUT_TEMPLATE_TYPES + from pydra.engine.specs import OUTPUT_TEMPLATE_TYPES if inputs_dict_st is None: inputs_dict_st = attr.asdict(inputs, recurse=False) @@ -162,7 +163,7 @@ def template_update_single( raise TypeError( f"type of '{field.name}' is Path, consider using Union[Path, bool]" ) - if inp_val_set is not attr.NOTHING and not isinstance(inp_val_set, LazyField): + if inp_val_set is not attr.NOTHING and not is_lazy(inp_val_set): inp_val_set = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(inp_val_set) elif spec_type == "output": if not TypeParser.contains_type(FileSet, field.type): diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 0cdc4f07f2..4b45e9cf7b 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1,805 +1,22 @@ """Task I/O specifications.""" +import os from pathlib import Path +import re +import inspect import typing as ty - -# import inspect -# import re -# import os -from pydra.engine.audit import AuditFlag - -# from glob import glob -import attrs +from glob import glob from typing_extensions import Self - -# from fileformats.core import FileSet -from fileformats.generic import ( - File, - # Directory, -) -from .helpers import attr_fields - -# from .helpers_file import template_update_single -# from pydra.utils.hash import hash_function, Cache - -# from pydra.utils.misc import add_exc_note - - -# @attrs.define(auto_attribs=True, kw_only=True) -# class SpecInfo: -# """Base data structure for metadata of specifications.""" - -# name: str -# """A name for the specification.""" -# fields: ty.List[ty.Tuple] = attrs.field(factory=list) -# """List of names of fields (can be inputs or outputs).""" -# bases: ty.Sequence[ty.Type["BaseSpec"]] = attrs.field(factory=tuple) -# """Keeps track of specification inheritance. -# Should be a tuple containing at least one BaseSpec """ - - -# @attrs.define(auto_attribs=True, kw_only=True) -# class BaseSpec: -# """The base dataclass specs for all inputs and outputs.""" - -# def collect_additional_outputs(self, inputs, output_dir, outputs): -# """Get additional outputs.""" -# return {} - -# @property -# def hash(self): -# hsh, self._hashes = self._compute_hashes() -# return hsh - -# def hash_changes(self): -# """Detects any changes in the hashed values between the current inputs and the -# previously calculated values""" -# _, new_hashes = self._compute_hashes() -# return [k for k, v in new_hashes.items() if v != self._hashes[k]] - -# def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: -# """Compute a basic hash for any given set of fields.""" -# inp_dict = {} -# for field in attr_fields( -# self, exclude_names=("_graph_checksums", "bindings", "files_hash") -# ): -# if field.metadata.get("output_file_template"): -# continue -# # removing values that are not set from hash calculation -# if getattr(self, field.name) is attrs.NOTHING: -# continue -# if "container_path" in field.metadata: -# continue -# inp_dict[field.name] = getattr(self, field.name) -# hash_cache = Cache() -# field_hashes = { -# k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() -# } -# if hasattr(self, "_graph_checksums"): -# field_hashes["_graph_checksums"] = self._graph_checksums -# return hash_function(sorted(field_hashes.items())), field_hashes - -# def retrieve_values(self, wf, state_index: ty.Optional[int] = None): -# """Get values contained by this spec.""" -# retrieved_values = {} -# for field in attr_fields(self): -# value = getattr(self, field.name) -# if isinstance(value, LazyField): -# retrieved_values[field.name] = value.get_value( -# wf, state_index=state_index -# ) -# for field, val in retrieved_values.items(): -# setattr(self, field, val) - -# def check_fields_input_spec(self): -# """ -# Check fields from input spec based on the medatada. - -# e.g., if xor, requires are fulfilled, if value provided when mandatory. - -# """ -# fields = attr_fields(self) - -# for field in fields: -# field_is_mandatory = bool(field.metadata.get("mandatory")) -# field_is_unset = getattr(self, field.name) is attrs.NOTHING - -# if field_is_unset and not field_is_mandatory: -# continue - -# # Collect alternative fields associated with this field. -# alternative_fields = { -# name: getattr(self, name) is not attrs.NOTHING -# for name in field.metadata.get("xor", []) -# if name != field.name -# } -# alternatives_are_set = any(alternative_fields.values()) - -# # Raise error if no field in mandatory alternative group is set. -# if field_is_unset: -# if alternatives_are_set: -# continue -# message = f"{field.name} is mandatory and unset." -# if alternative_fields: -# raise AttributeError( -# message[:-1] -# + f", but no alternative provided by {list(alternative_fields)}." -# ) -# else: -# raise AttributeError(message) - -# # Raise error if multiple alternatives are set. -# elif alternatives_are_set: -# set_alternative_fields = [ -# name for name, is_set in alternative_fields.items() if is_set -# ] -# raise AttributeError( -# f"{field.name} is mutually exclusive with {set_alternative_fields}" -# ) - -# # Collect required fields associated with this field. -# required_fields = { -# name: getattr(self, name) is not attrs.NOTHING -# for name in field.metadata.get("requires", []) -# if name != field.name -# } - -# # Raise error if any required field is unset. -# if not all(required_fields.values()): -# unset_required_fields = [ -# name for name, is_set in required_fields.items() if not is_set -# ] -# raise AttributeError(f"{field.name} requires {unset_required_fields}") - -# def check_metadata(self): -# """Check contained metadata.""" - -# def template_update(self): -# """Update template.""" - -# def copyfile_input(self, output_dir): -# """Copy the file pointed by a :class:`File` input.""" - - -@attrs.define(auto_attribs=True, kw_only=True) -class Runtime: - """Represent run time metadata.""" - - rss_peak_gb: ty.Optional[float] = None - """Peak in consumption of physical RAM.""" - vms_peak_gb: ty.Optional[float] = None - """Peak in consumption of virtual memory.""" - cpu_peak_percent: ty.Optional[float] = None - """Peak in cpu consumption.""" - - -@attrs.define(auto_attribs=True, kw_only=True) -class Result: - """Metadata regarding the outputs of processing.""" - - output: ty.Optional[ty.Any] = None - runtime: ty.Optional[Runtime] = None - errored: bool = False - - def __getstate__(self): - state = self.__dict__.copy() - if state["output"] is not None: - fields = tuple((el.name, el.type) for el in attr_fields(state["output"])) - state["output_spec"] = (state["output"].__class__.__name__, fields) - state["output"] = attrs.asdict(state["output"], recurse=False) - return state - - def __setstate__(self, state): - if "output_spec" in state: - spec = list(state["output_spec"]) - del state["output_spec"] - klass = attrs.make_class( - spec[0], {k: attrs.field(type=v) for k, v in list(spec[1])} - ) - state["output"] = klass(**state["output"]) - self.__dict__.update(state) - - def get_output_field(self, field_name): - """Used in get_values in Workflow - - Parameters - ---------- - field_name : `str` - Name of field in LazyField object - """ - if field_name == "all_": - return attrs.asdict(self.output, recurse=False) - else: - return getattr(self.output, field_name) - - -@attrs.define(auto_attribs=True, kw_only=True) -class RuntimeSpec: - """ - Specification for a task. - - From CWL:: - - InlineJavascriptRequirement - SchemaDefRequirement - DockerRequirement - SoftwareRequirement - InitialWorkDirRequirement - EnvVarRequirement - ShellCommandRequirement - ResourceRequirement - - InlineScriptRequirement - - """ - - outdir: ty.Optional[str] = None - container: ty.Optional[str] = "shell" - network: bool = False - - -# @attrs.define(auto_attribs=True, kw_only=True) -# class FunctionSpec(BaseSpec): -# """Specification for a process invoked from a shell.""" - -# def check_metadata(self): -# """ -# Check the metadata for fields in input_spec and fields. - -# Also sets the default values when available and needed. - -# """ -# supported_keys = { -# "allowed_values", -# "copyfile", -# "help_string", -# "mandatory", -# # "readonly", #likely not needed -# # "output_field_name", #likely not needed -# # "output_file_template", #likely not needed -# "requires", -# "keep_extension", -# "xor", -# "sep", -# } -# for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): -# mdata = fld.metadata -# # checking keys from metadata -# if set(mdata.keys()) - supported_keys: -# raise AttributeError( -# f"only these keys are supported {supported_keys}, but " -# f"{set(mdata.keys()) - supported_keys} provided" -# ) -# # checking if the help string is provided (required field) -# if "help_string" not in mdata: -# raise AttributeError(f"{fld.name} doesn't have help_string field") -# # not allowing for default if the field is mandatory -# if not fld.default == attrs.NOTHING and mdata.get("mandatory"): -# raise AttributeError( -# f"default value ({fld.default!r}) should not be set when the field " -# f"('{fld.name}') in {self}) is mandatory" -# ) -# # setting default if value not provided and default is available -# if getattr(self, fld.name) is None: -# if not fld.default == attrs.NOTHING: -# setattr(self, fld.name, fld.default) - - -# @attrs.define(auto_attribs=True, kw_only=True) -# class ShellSpec(BaseSpec): -# """Specification for a process invoked from a shell.""" - -# executable: ty.Union[str, ty.List[str]] = attrs.field( -# metadata={ -# "help_string": "the first part of the command, can be a string, " -# "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" -# } -# ) -# args: ty.Union[str, ty.List[str], None] = attrs.field( -# default=None, -# metadata={ -# "help_string": "the last part of the command, can be a string, " -# "e.g. , or a list" -# }, -# ) - -# def retrieve_values(self, wf, state_index=None): -# """Parse output results.""" -# temp_values = {} -# for field in attr_fields(self): -# # retrieving values that do not have templates -# if not field.metadata.get("output_file_template"): -# value = getattr(self, field.name) -# if isinstance(value, LazyField): -# temp_values[field.name] = value.get_value( -# wf, state_index=state_index -# ) -# for field, val in temp_values.items(): -# value = path_to_string(value) -# setattr(self, field, val) - -# def check_metadata(self): -# """ -# Check the metadata for fields in input_spec and fields. - -# Also sets the default values when available and needed. - -# """ -# from pydra.utils.typing import TypeParser - -# supported_keys = { -# "allowed_values", -# "argstr", -# "container_path", -# "copyfile", -# "help_string", -# "mandatory", -# "readonly", -# "output_field_name", -# "output_file_template", -# "position", -# "requires", -# "keep_extension", -# "xor", -# "sep", -# "formatter", -# "_output_type", -# } - -# for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): -# mdata = fld.metadata -# # checking keys from metadata -# if set(mdata.keys()) - supported_keys: -# raise AttributeError( -# f"only these keys are supported {supported_keys}, but " -# f"{set(mdata.keys()) - supported_keys} provided for '{fld.name}' " -# f"field in {self}" -# ) -# # checking if the help string is provided (required field) -# if "help_string" not in mdata: -# raise AttributeError( -# f"{fld.name} doesn't have help_string field in {self}" -# ) -# # assuming that fields with output_file_template shouldn't have default -# if mdata.get("output_file_template"): -# if not any( -# TypeParser.matches_type(fld.type, t) for t in OUTPUT_TEMPLATE_TYPES -# ): -# raise TypeError( -# f"Type of '{fld.name}' should be one of {OUTPUT_TEMPLATE_TYPES} " -# f"(not {fld.type}) because it has a value for output_file_template " -# f"({mdata['output_file_template']!r})" -# ) -# if fld.default not in [attrs.NOTHING, True, False]: -# raise AttributeError( -# f"default value ({fld.default!r}) should not be set together with " -# f"output_file_template ({mdata['output_file_template']!r}) for " -# f"'{fld.name}' field in {self}" -# ) -# # not allowing for default if the field is mandatory -# if not fld.default == attrs.NOTHING and mdata.get("mandatory"): -# raise AttributeError( -# f"default value ({fld.default!r}) should not be set when the field " -# f"('{fld.name}') in {self}) is mandatory" -# ) -# # setting default if value not provided and default is available -# if getattr(self, fld.name) is None: -# if not fld.default == attrs.NOTHING: -# setattr(self, fld.name, fld.default) - - -# @attrs.define(auto_attribs=True, kw_only=True) -# class ShellOutSpec: -# """Output specification of a generic shell process.""" - -# return_code: int -# """The process' exit code.""" -# stdout: str -# """The process' standard output.""" -# stderr: str -# """The process' standard input.""" - -# def collect_additional_outputs(self, inputs, output_dir, outputs): -# from pydra.utils.typing import TypeParser - -# """Collect additional outputs from shelltask output_spec.""" -# additional_out = {} -# for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): -# if not TypeParser.is_subclass( -# fld.type, -# ( -# os.PathLike, -# MultiOutputObj, -# int, -# float, -# bool, -# str, -# list, -# ), -# ): -# raise TypeError( -# f"Support for {fld.type} type, required for '{fld.name}' in {self}, " -# "has not been implemented in collect_additional_output" -# ) -# # assuming that field should have either default or metadata, but not both -# input_value = getattr(inputs, fld.name, attrs.NOTHING) -# if fld.metadata and "callable" in fld.metadata: -# fld_out = self._field_metadata(fld, inputs, output_dir, outputs) -# elif fld.type in [int, float, bool, str, list]: -# raise AttributeError(f"{fld.type} has to have a callable in metadata") -# elif input_value: # Map input value through to output -# fld_out = input_value -# elif fld.default != attrs.NOTHING: -# fld_out = self._field_defaultvalue(fld, output_dir) -# else: -# raise AttributeError("File has to have default value or metadata") -# if TypeParser.contains_type(FileSet, fld.type): -# label = f"output field '{fld.name}' of {self}" -# fld_out = TypeParser(fld.type, label=label).coerce(fld_out) -# additional_out[fld.name] = fld_out -# return additional_out - -# def generated_output_names(self, inputs, output_dir): -# """Returns a list of all outputs that will be generated by the task. -# Takes into account the task input and the requires list for the output fields. -# TODO: should be in all Output specs? -# """ -# # checking the input (if all mandatory fields are provided, etc.) -# inputs.check_fields_input_spec() -# output_names = ["return_code", "stdout", "stderr"] -# for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): -# if fld.type not in [File, MultiOutputFile, Directory]: -# raise Exception("not implemented (collect_additional_output)") -# # assuming that field should have either default or metadata, but not both -# if ( -# fld.default in (None, attrs.NOTHING) and not fld.metadata -# ): # TODO: is it right? -# raise AttributeError("File has to have default value or metadata") -# elif fld.default != attrs.NOTHING: -# output_names.append(fld.name) -# elif ( -# fld.metadata -# and self._field_metadata( -# fld, inputs, output_dir, outputs=None, check_existance=False -# ) -# != attrs.NOTHING -# ): -# output_names.append(fld.name) -# return output_names - -# def _field_defaultvalue(self, fld, output_dir): -# """Collect output file if the default value specified.""" -# if not isinstance(fld.default, (str, Path)): -# raise AttributeError( -# f"{fld.name} is a File, so default value " -# f"should be a string or a Path, " -# f"{fld.default} provided" -# ) -# default = fld.default -# if isinstance(default, str): -# default = Path(default) - -# default = output_dir / default -# if "*" not in str(default): -# if default.exists(): -# return default -# else: -# raise AttributeError(f"file {default} does not exist") -# else: -# all_files = [Path(el) for el in glob(str(default.expanduser()))] -# if len(all_files) > 1: -# return all_files -# elif len(all_files) == 1: -# return all_files[0] -# else: -# raise AttributeError(f"no file matches {default.name}") - -# def _field_metadata( -# self, fld, inputs, output_dir, outputs=None, check_existance=True -# ): -# """Collect output file if metadata specified.""" -# if self._check_requires(fld, inputs) is False: -# return attrs.NOTHING - -# if "value" in fld.metadata: -# return output_dir / fld.metadata["value"] -# # this block is only run if "output_file_template" is provided in output_spec -# # if the field is set in input_spec with output_file_template, -# # than the field already should have value -# elif "output_file_template" in fld.metadata: -# value = template_update_single( -# fld, inputs=inputs, output_dir=output_dir, spec_type="output" -# ) - -# if fld.type is MultiOutputFile and type(value) is list: -# # TODO: how to deal with mandatory list outputs -# ret = [] -# for val in value: -# val = Path(val) -# if check_existance and not val.exists(): -# ret.append(attrs.NOTHING) -# else: -# ret.append(val) -# return ret -# else: -# val = Path(value) -# # checking if the file exists -# if check_existance and not val.exists(): -# # if mandatory raise exception -# if "mandatory" in fld.metadata: -# if fld.metadata["mandatory"]: -# raise Exception( -# f"mandatory output for variable {fld.name} does not exist" -# ) -# return attrs.NOTHING -# return val -# elif "callable" in fld.metadata: -# callable_ = fld.metadata["callable"] -# if isinstance(callable_, staticmethod): -# # In case callable is defined as a static method, -# # retrieve the function wrapped in the descriptor. -# callable_ = callable_.__func__ -# call_args = inspect.getfullargspec(callable_) -# call_args_val = {} -# for argnm in call_args.args: -# if argnm == "field": -# call_args_val[argnm] = fld -# elif argnm == "output_dir": -# call_args_val[argnm] = output_dir -# elif argnm == "inputs": -# call_args_val[argnm] = inputs -# elif argnm == "stdout": -# call_args_val[argnm] = outputs["stdout"] -# elif argnm == "stderr": -# call_args_val[argnm] = outputs["stderr"] -# else: -# try: -# call_args_val[argnm] = getattr(inputs, argnm) -# except AttributeError: -# raise AttributeError( -# f"arguments of the callable function from {fld.name} " -# f"has to be in inputs or be field or output_dir, " -# f"but {argnm} is used" -# ) -# return callable_(**call_args_val) -# else: -# raise Exception( -# f"Metadata for '{fld.name}', does not not contain any of the required fields " -# f'("callable", "output_file_template" or "value"): {fld.metadata}.' -# ) - -# def _check_requires(self, fld, inputs): -# """checking if all fields from the requires and template are set in the input -# if requires is a list of list, checking if at least one list has all elements set -# """ -# from .helpers import ensure_list - -# if "requires" in fld.metadata: -# # if requires is a list of list it is treated as el[0] OR el[1] OR... -# required_fields = ensure_list(fld.metadata["requires"]) -# if all([isinstance(el, list) for el in required_fields]): -# field_required_OR = required_fields -# # if requires is a list of tuples/strings - I'm creating a 1-el nested list -# elif all([isinstance(el, (str, tuple)) for el in required_fields]): -# field_required_OR = [required_fields] -# else: -# raise Exception( -# f"requires field can be a list of list, or a list " -# f"of strings/tuples, but {fld.metadata['requires']} " -# f"provided for {fld.name}" -# ) -# else: -# field_required_OR = [[]] - -# for field_required in field_required_OR: -# # if the output has output_file_template field, -# # adding all input fields from the template to requires -# if "output_file_template" in fld.metadata: -# template = fld.metadata["output_file_template"] -# # if a template is a function it has to be run first with the inputs as the only arg -# if callable(template): -# template = template(inputs) -# inp_fields = re.findall(r"{\w+}", template) -# field_required += [ -# el[1:-1] for el in inp_fields if el[1:-1] not in field_required -# ] - -# # it's a flag, of the field from the list is not in input it will be changed to False -# required_found = True -# for field_required in field_required_OR: -# required_found = True -# # checking if the input fields from requires have set values -# for inp in field_required: -# if isinstance(inp, str): # name of the input field -# if not hasattr(inputs, inp): -# raise Exception( -# f"{inp} is not a valid input field, can't be used in requires" -# ) -# elif getattr(inputs, inp) in [attrs.NOTHING, None]: -# required_found = False -# break -# elif isinstance(inp, tuple): # (name, allowed values) -# inp, allowed_val = inp[0], ensure_list(inp[1]) -# if not hasattr(inputs, inp): -# raise Exception( -# f"{inp} is not a valid input field, can't be used in requires" -# ) -# elif getattr(inputs, inp) not in allowed_val: -# required_found = False -# break -# else: -# raise Exception( -# f"each element of the requires element should be a string or a tuple, " -# f"but {inp} is found in {field_required}" -# ) -# # if the specific list from field_required_OR has all elements set, no need to check more -# if required_found: -# break - -# if required_found: -# return True -# else: -# return False - - -# @attrs.define -# class LazyInterface: -# _task: "core.Task" = attrs.field() -# _attr_type: str - -# def __getattr__(self, name): -# if name in ("_task", "_attr_type", "_field_names"): -# raise AttributeError(f"{name} hasn't been set yet") -# if name not in self._field_names: -# raise AttributeError( -# f"Task '{self._task.name}' has no {self._attr_type} attribute '{name}', " -# "available: '" + "', '".join(self._field_names) + "'" -# ) -# type_ = self._get_type(name) -# splits = self._get_task_splits() -# combines = self._get_task_combines() -# if combines and self._attr_type == "output": -# # Add in any scalar splits referencing upstream splits, i.e. "_myupstreamtask", -# # "_myarbitrarytask" -# combined_upstreams = set() -# if self._task.state: -# for scalar in LazyField.normalize_splitter( -# self._task.state.splitter, strip_previous=False -# ): -# for field in scalar: -# if field.startswith("_"): -# node_name = field[1:] -# if any(c.split(".")[0] == node_name for c in combines): -# combines.update( -# f for f in scalar if not f.startswith("_") -# ) -# combined_upstreams.update( -# f[1:] for f in scalar if f.startswith("_") -# ) -# if combines: -# # Wrap type in list which holds the combined items -# type_ = ty.List[type_] -# # Iterate through splits to remove any splits which are removed by the -# # combiner -# for splitter in copy(splits): -# remaining = tuple( -# s -# for s in splitter -# if not any( -# (x in combines or x.split(".")[0] in combined_upstreams) -# for x in s -# ) -# ) -# if remaining != splitter: -# splits.remove(splitter) -# if remaining: -# splits.add(remaining) -# # Wrap the type in a nested StateArray type -# if splits: -# type_ = StateArray[type_] -# lf_klass = LazyInField if self._attr_type == "input" else LazyOutField -# return lf_klass[type_]( -# name=self._task.name, -# field=name, -# type=type_, -# splits=splits, -# ) - -# def _get_task_splits(self) -> ty.Set[ty.Tuple[ty.Tuple[str, ...], ...]]: -# """Returns the states over which the inputs of the task are split""" -# splitter = self._task.state.splitter if self._task.state else None -# splits = set() -# if splitter: -# # Ensure that splits is of tuple[tuple[str, ...], ...] form -# splitter = LazyField.normalize_splitter(splitter) -# if splitter: -# splits.add(splitter) -# for inpt in attrs.asdict(self._task.inputs, recurse=False).values(): -# if isinstance(inpt, LazyField): -# splits.update(inpt.splits) -# return splits - -# def _get_task_combines(self) -> ty.Set[ty.Union[str, ty.Tuple[str, ...]]]: -# """Returns the states over which the outputs of the task are combined""" -# combiner = ( -# self._task.state.combiner -# if self._task.state is not None -# else getattr(self._task, "fut_combiner", None) -# ) -# return set(combiner) if combiner else set() - - -# class LazyIn(LazyInterface): -# _attr_type = "input" - -# def _get_type(self, name): -# attr = next(t for n, t in self._task.input_spec.fields if n == name) -# if attr is None: -# return ty.Any -# elif inspect.isclass(attr): -# return attr -# else: -# return attr.type - -# @property -# def _field_names(self): -# return [field[0] for field in self._task.input_spec.fields] - - -# class LazyOut(LazyInterface): -# _attr_type = "output" - -# def _get_type(self, name): -# try: -# type_ = next(f[1] for f in self._task.output_spec.fields if f[0] == name) -# except StopIteration: -# type_ = ty.Any -# else: -# if not inspect.isclass(type_): -# try: -# type_ = type_.type # attrs _CountingAttribute -# except AttributeError: -# pass # typing._SpecialForm -# return type_ - -# @property -# def _field_names(self): -# return self._task.output_names + ["all_"] - - -def donothing(*args, **kwargs): - return None - - -@attrs.define(auto_attribs=True, kw_only=True) -class TaskHook: - """Callable task hooks.""" - - pre_run_task: ty.Callable = donothing - post_run_task: ty.Callable = donothing - pre_run: ty.Callable = donothing - post_run: ty.Callable = donothing - - def __setattr__(self, attr, val): - if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - raise AttributeError("Cannot set unknown hook") - super().__setattr__(attr, val) - - def reset(self): - for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - setattr(self, val, donothing) - - -def path_to_string(value): - """Convert paths to strings.""" - if isinstance(value, Path): - value = str(value) - elif isinstance(value, list) and len(value) and isinstance(value[0], Path): - value = [str(val) for val in value] - return value +import attrs +from fileformats.generic import File, FileSet, Directory +from pydra.engine.audit import AuditFlag +from pydra.utils.typing import MultiOutputObj, MultiOutputFile +from .helpers import attr_fields, is_lazy +from .helpers_file import template_update_single +from pydra.utils.hash import hash_function, Cache -class OutputsSpec: +class OutSpec: """Base class for all output specifications""" def split( @@ -865,14 +82,17 @@ def combine( return self -OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) +OutSpecType = ty.TypeVar("OutputType", bound=OutSpec) -class TaskSpec(ty.Generic[OutputType]): +class TaskSpec(ty.Generic[OutSpecType]): """Base class for all task specifications""" Task: "ty.Type[core.Task]" + def __attrs_post_init__(self): + self._check_rules() + def __call__( self, name: str | None = None, @@ -912,5 +132,498 @@ def _check_for_unset_values(self): "before the workflow can be constructed" ) + @property + def hash(self): + hsh, self._hashes = self._compute_hashes() + return hsh + + def _hash_changes(self): + """Detects any changes in the hashed values between the current inputs and the + previously calculated values""" + _, new_hashes = self._compute_hashes() + return [k for k, v in new_hashes.items() if v != self._hashes[k]] + + def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: + """Compute a basic hash for any given set of fields.""" + inp_dict = {} + for field in attr_fields( + self, exclude_names=("_graph_checksums", "bindings", "files_hash") + ): + if field.metadata.get("output_file_template"): + continue + # removing values that are not set from hash calculation + if getattr(self, field.name) is attrs.NOTHING: + continue + if "container_path" in field.metadata: + continue + inp_dict[field.name] = getattr(self, field.name) + hash_cache = Cache() + field_hashes = { + k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() + } + if hasattr(self, "_graph_checksums"): + field_hashes["_graph_checksums"] = self._graph_checksums + return hash_function(sorted(field_hashes.items())), field_hashes + + def _retrieve_values(self, wf, state_index=None): + """Parse output results.""" + temp_values = {} + for field in attr_fields(self): + # retrieving values that do not have templates + if not field.metadata.get("output_file_template"): + value = getattr(self, field.name) + if is_lazy(value): + temp_values[field.name] = value.get_value( + wf, state_index=state_index + ) + for field, val in temp_values.items(): + value = path_to_string(value) + setattr(self, field, val) + + def _check_rules(self): + fields = attr_fields(self) + + for field in fields: + field_is_mandatory = bool(field.metadata.get("mandatory")) + field_is_unset = getattr(self, field.name) is attrs.NOTHING + + if field_is_unset and not field_is_mandatory: + continue + + # Collect alternative fields associated with this field. + alternative_fields = { + name: getattr(self, name) is not attrs.NOTHING + for name in field.metadata.get("xor", []) + if name != field.name + } + alternatives_are_set = any(alternative_fields.values()) + + # Raise error if no field in mandatory alternative group is set. + if field_is_unset: + if alternatives_are_set: + continue + message = f"{field.name} is mandatory and unset." + if alternative_fields: + raise AttributeError( + message[:-1] + + f", but no alternative provided by {list(alternative_fields)}." + ) + else: + raise AttributeError(message) + + # Raise error if multiple alternatives are set. + elif alternatives_are_set: + set_alternative_fields = [ + name for name, is_set in alternative_fields.items() if is_set + ] + raise AttributeError( + f"{field.name} is mutually exclusive with {set_alternative_fields}" + ) + + # Collect required fields associated with this field. + required_fields = { + name: getattr(self, name) is not attrs.NOTHING + for name in field.metadata.get("requires", []) + if name != field.name + } + + # Raise error if any required field is unset. + if not all(required_fields.values()): + unset_required_fields = [ + name for name, is_set in required_fields.items() if not is_set + ] + raise AttributeError(f"{field.name} requires {unset_required_fields}") + + +@attrs.define(kw_only=True) +class Runtime: + """Represent run time metadata.""" + + rss_peak_gb: ty.Optional[float] = None + """Peak in consumption of physical RAM.""" + vms_peak_gb: ty.Optional[float] = None + """Peak in consumption of virtual memory.""" + cpu_peak_percent: ty.Optional[float] = None + """Peak in cpu consumption.""" + + +@attrs.define(kw_only=True) +class Result: + """Metadata regarding the outputs of processing.""" + + output: ty.Optional[ty.Any] = None + runtime: ty.Optional[Runtime] = None + errored: bool = False + + def __getstate__(self): + state = self.__dict__.copy() + if state["output"] is not None: + fields = tuple((el.name, el.type) for el in attr_fields(state["output"])) + state["output_spec"] = (state["output"].__class__.__name__, fields) + state["output"] = attrs.asdict(state["output"], recurse=False) + return state + + def __setstate__(self, state): + if "output_spec" in state: + spec = list(state["output_spec"]) + del state["output_spec"] + klass = attrs.make_class( + spec[0], {k: attrs.field(type=v) for k, v in list(spec[1])} + ) + state["output"] = klass(**state["output"]) + self.__dict__.update(state) + + def get_output_field(self, field_name): + """Used in get_values in Workflow + + Parameters + ---------- + field_name : `str` + Name of field in LazyField object + """ + if field_name == "all_": + return attrs.asdict(self.output, recurse=False) + else: + return getattr(self.output, field_name) + + +@attrs.define(kw_only=True) +class RuntimeSpec: + """ + Specification for a task. + + From CWL:: + + InlineJavascriptRequirement + SchemaDefRequirement + DockerRequirement + SoftwareRequirement + InitialWorkDirRequirement + EnvVarRequirement + ShellCommandRequirement + ResourceRequirement + + InlineScriptRequirement + + """ + + outdir: ty.Optional[str] = None + container: ty.Optional[str] = "shell" + network: bool = False + + +class PythonOutSpec(OutSpec): + pass + + +class PythonSpec(TaskSpec): + pass + + +class WorkflowOutSpec(OutSpec): + pass + + +class WorkflowSpec(TaskSpec): + pass + + +@attrs.define(kw_only=True) +class ShellOutSpec(OutSpec): + """Output specification of a generic shell process.""" + + return_code: int + """The process' exit code.""" + stdout: str + """The process' standard output.""" + stderr: str + """The process' standard input.""" + + def _collect_additional_outputs(self, inputs, output_dir, outputs): + from ..utils.typing import TypeParser + + """Collect additional outputs from shelltask output_spec.""" + additional_out = {} + for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): + if not TypeParser.is_subclass( + fld.type, + ( + os.PathLike, + MultiOutputObj, + int, + float, + bool, + str, + list, + ), + ): + raise TypeError( + f"Support for {fld.type} type, required for '{fld.name}' in {self}, " + "has not been implemented in collect_additional_output" + ) + # assuming that field should have either default or metadata, but not both + input_value = getattr(inputs, fld.name, attrs.NOTHING) + if input_value is not attrs.NOTHING: + if TypeParser.contains_type(FileSet, fld.type): + if input_value is not False: + label = f"output field '{fld.name}' of {self}" + input_value = TypeParser(fld.type, label=label).coerce( + input_value + ) + additional_out[fld.name] = input_value + elif ( + fld.default is None or fld.default == attrs.NOTHING + ) and not fld.metadata: # TODO: is it right? + raise AttributeError("File has to have default value or metadata") + elif fld.default != attrs.NOTHING: + additional_out[fld.name] = self._field_defaultvalue(fld, output_dir) + elif fld.metadata: + if ( + fld.type in [int, float, bool, str, list] + and "callable" not in fld.metadata + ): + raise AttributeError( + f"{fld.type} has to have a callable in metadata" + ) + additional_out[fld.name] = self._field_metadata( + fld, inputs, output_dir, outputs + ) + return additional_out + + def _generated_output_names(self, inputs, output_dir): + """Returns a list of all outputs that will be generated by the task. + Takes into account the task input and the requires list for the output fields. + TODO: should be in all Output specs? + """ + # checking the input (if all mandatory fields are provided, etc.) + inputs.check_fields_input_spec() + output_names = ["return_code", "stdout", "stderr"] + for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): + if fld.type not in [File, MultiOutputFile, Directory]: + raise Exception("not implemented (collect_additional_output)") + # assuming that field should have either default or metadata, but not both + if ( + fld.default in (None, attrs.NOTHING) and not fld.metadata + ): # TODO: is it right? + raise AttributeError("File has to have default value or metadata") + elif fld.default != attrs.NOTHING: + output_names.append(fld.name) + elif ( + fld.metadata + and self._field_metadata( + fld, inputs, output_dir, outputs=None, check_existance=False + ) + != attrs.NOTHING + ): + output_names.append(fld.name) + return output_names + + def _field_defaultvalue(self, fld, output_dir): + """Collect output file if the default value specified.""" + if not isinstance(fld.default, (str, Path)): + raise AttributeError( + f"{fld.name} is a File, so default value " + f"should be a string or a Path, " + f"{fld.default} provided" + ) + default = fld.default + if isinstance(default, str): + default = Path(default) + + default = output_dir / default + if "*" not in str(default): + if default.exists(): + return default + else: + raise AttributeError(f"file {default} does not exist") + else: + all_files = [Path(el) for el in glob(str(default.expanduser()))] + if len(all_files) > 1: + return all_files + elif len(all_files) == 1: + return all_files[0] + else: + raise AttributeError(f"no file matches {default.name}") + + def _field_metadata( + self, fld, inputs, output_dir, outputs=None, check_existance=True + ): + """Collect output file if metadata specified.""" + if self._check_requires(fld, inputs) is False: + return attrs.NOTHING + + if "value" in fld.metadata: + return output_dir / fld.metadata["value"] + # this block is only run if "output_file_template" is provided in output_spec + # if the field is set in input_spec with output_file_template, + # than the field already should have value + elif "output_file_template" in fld.metadata: + value = template_update_single( + fld, inputs=inputs, output_dir=output_dir, spec_type="output" + ) + + if fld.type is MultiOutputFile and type(value) is list: + # TODO: how to deal with mandatory list outputs + ret = [] + for val in value: + val = Path(val) + if check_existance and not val.exists(): + ret.append(attrs.NOTHING) + else: + ret.append(val) + return ret + else: + val = Path(value) + # checking if the file exists + if check_existance and not val.exists(): + # if mandatory raise exception + if "mandatory" in fld.metadata: + if fld.metadata["mandatory"]: + raise Exception( + f"mandatory output for variable {fld.name} does not exist" + ) + return attrs.NOTHING + return val + elif "callable" in fld.metadata: + callable_ = fld.metadata["callable"] + if isinstance(callable_, staticmethod): + # In case callable is defined as a static method, + # retrieve the function wrapped in the descriptor. + callable_ = callable_.__func__ + call_args = inspect.getfullargspec(callable_) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = fld + elif argnm == "output_dir": + call_args_val[argnm] = output_dir + elif argnm == "inputs": + call_args_val[argnm] = inputs + elif argnm == "stdout": + call_args_val[argnm] = outputs["stdout"] + elif argnm == "stderr": + call_args_val[argnm] = outputs["stderr"] + else: + try: + call_args_val[argnm] = getattr(inputs, argnm) + except AttributeError: + raise AttributeError( + f"arguments of the callable function from {fld.name} " + f"has to be in inputs or be field or output_dir, " + f"but {argnm} is used" + ) + return callable_(**call_args_val) + else: + raise Exception( + f"Metadata for '{fld.name}', does not not contain any of the required fields " + f'("callable", "output_file_template" or "value"): {fld.metadata}.' + ) + + def _check_requires(self, fld, inputs): + """checking if all fields from the requires and template are set in the input + if requires is a list of list, checking if at least one list has all elements set + """ + from .helpers import ensure_list + + if "requires" in fld.metadata: + # if requires is a list of list it is treated as el[0] OR el[1] OR... + required_fields = ensure_list(fld.metadata["requires"]) + if all([isinstance(el, list) for el in required_fields]): + field_required_OR = required_fields + # if requires is a list of tuples/strings - I'm creating a 1-el nested list + elif all([isinstance(el, (str, tuple)) for el in required_fields]): + field_required_OR = [required_fields] + else: + raise Exception( + f"requires field can be a list of list, or a list " + f"of strings/tuples, but {fld.metadata['requires']} " + f"provided for {fld.name}" + ) + else: + field_required_OR = [[]] + + for field_required in field_required_OR: + # if the output has output_file_template field, + # adding all input fields from the template to requires + if self.path_template: + # if a template is a function it has to be run first with the inputs as the only arg + if callable(self.path_template): + template = self.path_template(inputs) + inp_fields = re.findall(r"{(\w+)(?:\:[^\}]+)?}", template) + field_required += [ + el[1:-1] for el in inp_fields if el[1:-1] not in field_required + ] + + # it's a flag, of the field from the list is not in input it will be changed to False + required_found = True + for field_required in field_required_OR: + required_found = True + # checking if the input fields from requires have set values + for inp in field_required: + if isinstance(inp, str): # name of the input field + if not hasattr(inputs, inp): + raise Exception( + f"{inp} is not a valid input field, can't be used in requires" + ) + elif getattr(inputs, inp) in [attrs.NOTHING, None]: + required_found = False + break + elif isinstance(inp, tuple): # (name, allowed values) + inp, allowed_val = inp[0], ensure_list(inp[1]) + if not hasattr(inputs, inp): + raise Exception( + f"{inp} is not a valid input field, can't be used in requires" + ) + elif getattr(inputs, inp) not in allowed_val: + required_found = False + break + else: + raise Exception( + f"each element of the requires element should be a string or a tuple, " + f"but {inp} is found in {field_required}" + ) + # if the specific list from field_required_OR has all elements set, no need to check more + if required_found: + break + + if not required_found: + raise ValueError("Did not find all required fields in the input") + + +class ShellSpec(TaskSpec): + pass + + +def donothing(*args, **kwargs): + return None + + +@attrs.define(kw_only=True) +class TaskHook: + """Callable task hooks.""" + + pre_run_task: ty.Callable = donothing + post_run_task: ty.Callable = donothing + pre_run: ty.Callable = donothing + post_run: ty.Callable = donothing + + def __setattr__(self, attr, val): + if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: + raise AttributeError("Cannot set unknown hook") + super().__setattr__(attr, val) + + def reset(self): + for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: + setattr(self, val, donothing) + + +def path_to_string(value): + """Convert paths to strings.""" + if isinstance(value, Path): + value = str(value) + elif isinstance(value, list) and len(value) and isinstance(value[0], Path): + value = [str(val) for val in value] + return value + from pydra.engine import core # noqa: E402 diff --git a/pydra/engine/task.py b/pydra/engine/task.py index 68731f47bc..f2d3a2283e 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -45,28 +45,22 @@ import re import attr import attrs -import warnings import inspect import typing as ty import shlex from pathlib import Path import cloudpickle as cp -from fileformats.core import FileSet, DataType +from fileformats.core import FileSet from .core import Task, is_lazy from pydra.utils.messenger import AuditFlag from .specs import ( - # BaseSpec, - # SpecInfo, - # ShellSpec, - # ShellOutSpec, - TaskSpec, + ShellSpec, attr_fields, ) from .helpers import ( parse_format_string, position_sort, ensure_list, - # output_from_inputfields, parse_copyfile, ) from .helpers_file import template_update @@ -77,124 +71,6 @@ class FunctionTask(Task): """Wrap a Python callable as a task element.""" - def __init__( - self, - spec: TaskSpec, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - cont_dim=None, - messenger_args=None, - messengers=None, - name=None, - rerun=False, - **kwargs, - ): - """ - Initialize this task. - - Parameters - ---------- - func : :obj:`callable` - A Python executable function. - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - cache_locations : :obj:`list` of :obj:`os.pathlike` - List of alternative cache locations. - input_spec : :obj:`pydra.engine.specs.SpecInfo` - Specification of inputs. - cont_dim : :obj:`dict`, or `None` - Container dimensions for input fields, - if any of the container should be treated as a container - messenger_args : - TODO - messengers : - TODO - name : :obj:`str` - Name of this task. - output_spec : :obj:`pydra.engine.specs.BaseSpec` - Specification of inputs. - - """ - if input_spec is None: - fields = [] - for val in inspect.signature(func).parameters.values(): - if val.default is not inspect.Signature.empty: - val_dflt = val.default - else: - val_dflt = attr.NOTHING - if isinstance(val.annotation, ty.TypeVar): - raise NotImplementedError( - "Template types are not currently supported in task signatures " - f"(found in '{val.name}' field of '{name}' task), " - "see https://github.com/nipype/pydra/issues/672" - ) - fields.append( - ( - val.name, - attr.ib( - default=val_dflt, - type=val.annotation, - metadata={ - "help_string": f"{val.name} parameter from {func.__name__}" - }, - ), - ) - ) - fields.append(("_func", attr.ib(default=cp.dumps(func), type=bytes))) - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) - else: - input_spec.fields.append( - ("_func", attr.ib(default=cp.dumps(func), type=bytes)) - ) - self.input_spec = input_spec - if name is None: - name = func.__name__ - super().__init__( - name, - inputs=kwargs, - cont_dim=cont_dim, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=cache_dir, - cache_locations=cache_locations, - rerun=rerun, - ) - if output_spec is None: - name = "Output" - fields = [("out", ty.Any)] - if "return" in func.__annotations__: - return_info = func.__annotations__["return"] - # # e.g. python annotation: fun() -> ty.NamedTuple("Output", [("out", float)]) - # # or pydra decorator: @pydra.mark.annotate({"return": ty.NamedTuple(...)}) - # - - if ( - hasattr(return_info, "__name__") - and getattr(return_info, "__annotations__", None) - and not issubclass(return_info, DataType) - ): - name = return_info.__name__ - fields = list(return_info.__annotations__.items()) - # e.g. python annotation: fun() -> {"out": int} - # or pydra decorator: @pydra.mark.annotate({"return": {"out": int}}) - elif isinstance(return_info, dict): - fields = list(return_info.items()) - # e.g. python annotation: fun() -> (int, int) - # or pydra decorator: @pydra.mark.annotate({"return": (int, int)}) - elif isinstance(return_info, tuple): - fields = [(f"out{i}", t) for i, t in enumerate(return_info, 1)] - # e.g. python annotation: fun() -> int - # or pydra decorator: @pydra.mark.annotate({"return": int}) - else: - fields = [("out", return_info)] - output_spec = SpecInfo(name=name, fields=fields, bases=(BaseSpec,)) - - self.output_spec = output_spec - def _run_task(self, environment=None): inputs = attr.asdict(self.inputs, recurse=False) del inputs["_func"] @@ -220,12 +96,9 @@ def _run_task(self, environment=None): class ShellCommandTask(Task): """Wrap a shell command as a task element.""" - input_spec = None - output_spec = None - def __init__( self, - spec: TaskSpec, + spec: ShellSpec, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cont_dim=None, @@ -261,36 +134,7 @@ def __init__( Specification of inputs. strip : :obj:`bool` TODO - """ - - # using default name for task if no name provided - if name is None: - name = "ShellTask_noname" - - # # using provided spec, class attribute or setting the default SpecInfo - # self.input_spec = ( - # input_spec - # or self.input_spec - # or SpecInfo(name="Inputs", fields=[], bases=(ShellSpec,)) - # ) - # self.output_spec = ( - # output_spec - # or self.output_spec - # or SpecInfo(name="Output", fields=[], bases=(ShellOutSpec,)) - # ) - # self.output_spec = output_from_inputfields(self.output_spec, self.input_spec) - - for special_inp in ["executable", "args"]: - if hasattr(self, special_inp): - if special_inp not in kwargs: - kwargs[special_inp] = getattr(self, special_inp) - elif kwargs[special_inp] != getattr(self, special_inp): - warnings.warn( - f"you are changing the executable from {getattr(self, special_inp)} " - f"to {kwargs[special_inp]}" - ) - super().__init__( name=name, inputs=kwargs, diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index cbfbe6d1c2..11a8434c22 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -4,18 +4,19 @@ from typing_extensions import Self import attrs from pydra.engine.helpers import list_fields -from pydra.engine.specs import TaskSpec, OutputsSpec +from pydra.engine.specs import TaskSpec, OutSpec, WorkflowOutSpec from .lazy import LazyInField from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray from .node import Node -OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) +OutSpecType = ty.TypeVar("OutputType", bound=OutSpec) +WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutSpec) @attrs.define(auto_attribs=False) -class Workflow(ty.Generic[OutputType]): +class Workflow(ty.Generic[WorkflowOutSpecType]): """A workflow, constructed from a workflow specification Parameters @@ -29,14 +30,14 @@ class Workflow(ty.Generic[OutputType]): """ name: str = attrs.field() - inputs: TaskSpec[OutputType] = attrs.field() - outputs: OutputType = attrs.field() + inputs: TaskSpec[WorkflowOutSpecType] = attrs.field() + outputs: WorkflowOutSpecType = attrs.field() _nodes: dict[str, Node] = attrs.field(factory=dict) @classmethod def construct( cls, - spec: TaskSpec[OutputType], + spec: TaskSpec[WorkflowOutSpecType], ) -> Self: """Construct a workflow from a specification, caching the constructed worklow""" @@ -104,11 +105,9 @@ def construct( f"{len(output_lazy_fields)} ({output_lazy_fields})" ) for outpt, outpt_lf in zip(output_fields, output_lazy_fields): + # Automatically combine any uncombined state arrays into lists if TypeParser.get_origin(outpt_lf.type) is StateArray: - # Automatically combine any uncombined state arrays into lists - tp, _ = TypeParser.strip_splits(outpt_lf.type) - outpt_lf.type = list[tp] - outpt_lf.splits = frozenset() + outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] setattr(outputs, outpt.name, outpt_lf) else: if unset_outputs := [ @@ -127,7 +126,7 @@ def construct( return wf - def add(self, task_spec: TaskSpec[OutputType], name=None) -> OutputType: + def add(self, task_spec: TaskSpec[OutSpecType], name=None) -> OutSpecType: """Add a node to the workflow Parameters @@ -147,7 +146,7 @@ def add(self, task_spec: TaskSpec[OutputType], name=None) -> OutputType: name = type(task_spec).__name__ if name in self._nodes: raise ValueError(f"Node with name {name!r} already exists in the workflow") - node = Node[OutputType](name=name, spec=task_spec, workflow=self) + node = Node[OutSpecType](name=name, spec=task_spec, workflow=self) self._nodes[name] = node return node.lzout diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 7f5b32972a..197d3ca32d 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -4,7 +4,7 @@ import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy -from ..specs import TaskSpec, OutputsSpec +from ..specs import TaskSpec, OutSpec from ..helpers import ensure_list from .. import helpers_state as hlpst from ..state import State @@ -13,7 +13,7 @@ from .base import Workflow -OutputType = ty.TypeVar("OutputType", bound=OutputsSpec) +OutputType = ty.TypeVar("OutputType", bound=OutSpec) Splitter = ty.Union[str, ty.Tuple[str, ...]] _not_set = Enum("_not_set", "NOT_SET") @@ -43,8 +43,8 @@ class Node(ty.Generic[OutputType]): _cont_dim: dict[str, int] | None = attrs.field( init=False, default=None ) # QUESTION: should this be included in the state? - _inner_cont_dim: dict[str, int] | None = attrs.field( - init=False, default=None + _inner_cont_dim: dict[str, int] = attrs.field( + init=False, factory=dict ) # QUESTION: should this be included in the state? class Inputs: @@ -247,7 +247,9 @@ def combine( if not isinstance(combiner, (str, list)): raise Exception("combiner has to be a string or a list") combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) - if not_split := [c for c in combiner if not any(c in s for s in self.splitter)]: + if not_split := [ + c for c in combiner if not any(c in s for s in self.state.splitter) + ]: raise ValueError( f"Combiner fields {not_split} for Node {self.name!r} are not in the " f"splitter fields {self.splitter}" @@ -343,7 +345,14 @@ def _wrap_lzout_types_in_state_arrays(self) -> None: if not self.state: return outpt_lf: lazy.LazyOutField - state_depth = len(self.state.splitter_rpn) + remaining_splits = [] + for split in self.state.splitter: + if isinstance(split, str): + if split not in self.state.combiner: + remaining_splits.append(split) + elif all(s not in self.state.combiner for s in split): + remaining_splits.append(split) + state_depth = len(remaining_splits) for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): assert not outpt_lf.type_checked type_, _ = TypeParser.strip_splits(outpt_lf.type) diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 58249ddbfd..ee21d26db3 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -10,6 +10,7 @@ import attr from pydra.utils import add_exc_note from fileformats import field, core, generic +from pydra.engine.helpers import is_lazy try: from typing import get_origin, get_args @@ -213,12 +214,11 @@ def __call__(self, obj: ty.Any) -> T: if the coercion is not possible, or not specified by the `coercible`/`not_coercible` parameters, then a TypeError is raised """ - from pydra.engine.workflow.lazy import LazyField coerced: T if obj is attr.NOTHING: coerced = attr.NOTHING # type: ignore[assignment] - elif isinstance(obj, LazyField): + elif is_lazy(obj): try: self.check_type(obj.type) except TypeError as e: From bdee4c8556a2db25e82556184bf5719f3761331a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 12:35:24 +1100 Subject: [PATCH 051/342] cleaned up specs.py so that it works with new syntax --- pydra/design/base.py | 33 ++-- pydra/design/shell.py | 3 +- pydra/engine/audit.py | 8 +- pydra/engine/boutiques.py | 4 +- pydra/engine/core.py | 15 +- pydra/engine/helpers.py | 49 ++--- pydra/engine/helpers_file.py | 10 +- pydra/engine/helpers_state.py | 7 +- pydra/engine/specs.py | 332 +++++++++++++--------------------- pydra/engine/state.py | 4 +- pydra/engine/task.py | 16 +- pydra/engine/workflow/base.py | 14 +- pydra/engine/workflow/node.py | 8 +- 13 files changed, 204 insertions(+), 299 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 3f959358af..c54b8f31c8 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -36,8 +36,6 @@ "make_task_spec", ] -RESERVED_OUTPUT_NAMES = ("split", "combine") - class _Empty(enum.Enum): @@ -58,6 +56,11 @@ def is_type(_, __, val: ty.Any) -> bool: return inspect.isclass(val) or ty.get_origin(val) +def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: + """Ensure the default value has been coerced into the correct type""" + return TypeParser[self_.type](self_.type, label=self_.name)(value) + + @attrs.define(kw_only=True) class Field: """Base class for input and output fields to task specifications @@ -66,9 +69,11 @@ class Field: ---------- name: str, optional The name of the field, used when specifying a list of fields instead of a mapping - from name to field, by default it is None type: type, optional The type of the field, by default it is Any + from name to field, by default it is None + default : Any, optional + the default value for the field, by default it is EMPTY help_string: str, optional A short description of the input field. requires: list, optional @@ -83,6 +88,9 @@ class Field: type: ty.Type[ty.Any] = attrs.field( validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) ) + default: ty.Any = attrs.field( + default=EMPTY, converter=attrs.Converter(convert_default_value, with_self=True) + ) help_string: str = "" requires: list[str] | list[list[str]] = attrs.field( factory=list, converter=ensure_list @@ -97,10 +105,15 @@ class Arg(Field): Parameters ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is EMPTY help_string: str A short description of the input field. - default : Any, optional - the default value for the argument allowed_values: list, optional List of allowed values for the field. requires: list, optional @@ -118,14 +131,8 @@ class Arg(Field): If True the input field can’t be provided by the user but it aggregates other input fields (for example the fields with argstr: -o {fldA} {fldB}), by default it is False - type: type, optional - The type of the field, by default it is Any - name: str, optional - The name of the field, used when specifying a list of fields instead of a mapping - from name to field, by default it is None """ - default: ty.Any = EMPTY allowed_values: list | None = None xor: list | None = None copy_mode: File.CopyMode = File.CopyMode.any @@ -145,6 +152,8 @@ class Out(Field): from name to field, by default it is None type: type, optional The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is EMPTY help_string: str, optional A short description of the input field. requires: list, optional @@ -385,7 +394,7 @@ def make_outputs_spec( f"Cannot make {spec_type} output spec from {out_spec_bases} bases" ) outputs_bases = bases + (spec_type,) - if reserved_names := [n for n in outputs if n in RESERVED_OUTPUT_NAMES]: + if reserved_names := [n for n in outputs if n in spec_type.RESERVED_FIELD_NAMES]: raise ValueError( f"{reserved_names} are reserved and cannot be used for output field names" ) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 6587608960..d6191943d9 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -12,6 +12,7 @@ from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError from pydra.engine.specs import ShellSpec, ShellOutSpec +from pydra.engine.helpers import attrs_values from .base import ( Arg, Out, @@ -470,7 +471,7 @@ def add_arg(name, field_type, kwds, is_option=False): kwds["type"] = field field = field_type(name=name, **kwds) elif not isinstance(field, field_type): # If field type is outarg not out - field = field_type(**attrs.asdict(field, recurse=False)) + field = field_type(**attrs_values(field)) field.name = name type_ = kwds.pop("type", field.type) if field.type is ty.Any: diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 8d5695e4e4..6f8d2dd8c4 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -2,8 +2,8 @@ import os import json -import attr from pydra.utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag +from pydra.engine.helpers import attrs_values from fileformats.core import FileSet from pydra.utils.hash import hash_function @@ -104,7 +104,7 @@ def finalize_audit(self, result): ) # audit resources/runtime information self.eid = f"uid:{gen_uuid()}" - entity = attr.asdict(result.runtime, recurse=False) + entity = attrs_values(result.runtime) entity.update( **{ "@id": self.eid, @@ -180,12 +180,12 @@ def audit_check(self, flag): def audit_task(self, task): import subprocess as sp - from .helpers import attr_fields + from .helpers import attrs_fields label = task.name command = task.cmdline if hasattr(task.inputs, "executable") else None - attr_list = attr_fields(task.inputs) + attr_list = attrs_fields(task.inputs) for attrs in attr_list: input_name = attrs.name value = getattr(task.inputs, input_name) diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py index 65ca0b727e..8202da6b99 100644 --- a/pydra/engine/boutiques.py +++ b/pydra/engine/boutiques.py @@ -7,7 +7,7 @@ from pydra.utils.messenger import AuditFlag from pydra.engine.task import ShellCommandTask -from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutSpec, File, attr_fields +from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutSpec, File, attrs_fields from .helpers_file import is_local_file @@ -192,7 +192,7 @@ def _command_args_single(self, state_ind=None, index=None): def _bosh_invocation_file(self, state_ind=None, index=None): """creating bosh invocation file - json file with inputs values""" input_json = {} - for f in attr_fields(self.inputs, exclude_names=("executable", "args")): + for f in attrs_fields(self.inputs, exclude_names=("executable", "args")): if self.state and f"{self.name}.{f.name}" in state_ind: value = getattr(self.inputs, f.name)[state_ind[f"{self.name}.{f.name}"]] else: diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 7cf35f455f..f9c833ecbf 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -26,7 +26,8 @@ from .workflow.lazy import is_lazy from .helpers import ( create_checksum, - attr_fields, + attrs_fields, + attrs_values, print_help, load_result, save, @@ -342,7 +343,7 @@ def generated_output_names(self): The results depends on the input provided to the task """ output_klass = self.interface.Outputs - if hasattr(output_klass, "generated_output_names"): + if hasattr(output_klass, "_generated_output_names"): output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} ) @@ -352,7 +353,7 @@ def generated_output_names(self): if modified_inputs: _inputs = attr.evolve(_inputs, **modified_inputs) - return output.generated_output_names( + return output._generated_output_names( inputs=_inputs, output_dir=self.output_dir ) else: @@ -461,9 +462,7 @@ def _modify_inputs(self): from pydra.utils.typing import TypeParser orig_inputs = { - k: v - for k, v in attr.asdict(self.inputs, recurse=False).items() - if not k.startswith("_") + k: v for k, v in attrs_values(self.inputs).items() if not k.startswith("_") } map_copyfiles = {} input_fields = attr.fields(type(self.inputs)) @@ -754,7 +753,7 @@ def result(self, state_index=None, return_inputs=False): def _reset(self): """Reset the connections between inputs and LazyFields.""" - for field in attr_fields(self.inputs): + for field in attrs_fields(self.inputs): if field.name in self.inp_lf: setattr(self.inputs, field.name, self.inp_lf[field.name]) if is_workflow(self): @@ -979,7 +978,7 @@ def create_connections(self, task, detailed=False): """ # TODO: create connection is run twice other_states = {} - for field in attr_fields(task.inputs): + for field in attrs_fields(task.inputs): val = getattr(task.inputs, field.name) if is_lazy(val): # saving all connections with LazyFields diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index dc85205521..e2e69e4c06 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -26,11 +26,18 @@ PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" -def attr_fields(spec, exclude_names=()): +def attrs_fields(spec, exclude_names=()) -> list[attrs.Attribute]: + """Get the fields of a spec, excluding some names.""" return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] +def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: + """Get the values of an attrs object.""" + return attrs.asdict(obj, recurse=False, **kwargs) + + def list_fields(interface: "TaskSpec") -> list["Field"]: + """List the fields of a task specification""" if not attrs.has(interface): return [] return [ @@ -43,7 +50,7 @@ def list_fields(interface: "TaskSpec") -> list["Field"]: # from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile -def from_list_if_single(obj): +def from_list_if_single(obj: ty.Any) -> ty.Any: """Converts a list to a single item if it is of length == 1""" if obj is attrs.NOTHING: @@ -109,7 +116,7 @@ def load_result(checksum, cache_locations): return None -def save(task_path: Path, result=None, task=None, name_prefix=None): +def save(task_path: Path, result=None, task=None, name_prefix=None) -> None: """ Save a :class:`~pydra.engine.core.TaskBase` object and/or results. @@ -147,7 +154,7 @@ def save(task_path: Path, result=None, task=None, name_prefix=None): def copyfile_workflow(wf_path: os.PathLike, result): """if file in the wf results, the file will be copied to the workflow directory""" - for field in attr_fields(result.output): + for field in attrs_fields(result.output): value = getattr(result.output, field.name) # if the field is a path or it can contain a path _copyfile_single_value is run # to move all files and directories to the workflow directory @@ -375,38 +382,6 @@ def get_open_loop(): return loop -# def output_from_inputfields(interface: "Interface"): -# """ -# Collect values from output from input fields. -# If names_only is False, the output_spec is updated, -# if names_only is True only the names are returned - -# Parameters -# ---------- -# output_spec : -# TODO -# input_spec : -# TODO - -# """ -# current_output_spec_names = [f.name for f in attrs.fields(interface.Outputs)] -# new_fields = [] -# for fld in attrs.fields(interface): -# if "output_file_template" in fld.metadata: -# if "output_field_name" in fld.metadata: -# field_name = fld.metadata["output_field_name"] -# else: -# field_name = fld.name -# # not adding if the field already in the output_spec -# if field_name not in current_output_spec_names: -# # TODO: should probably remove some of the keys -# new_fields.append( -# (field_name, attrs.field(type=File, metadata=fld.metadata)) -# ) -# output_spec.fields += new_fields -# return output_spec - - def get_available_cpus(): """ Return the number of CPUs available to the current process or, if that is not @@ -658,7 +633,7 @@ def is_lazy(obj): if is_lazy(obj): return True - for f in attr_fields(obj): + for f in attrs_fields(obj): if isinstance(getattr(obj, f.name), LazyField): return True return False diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index f846e40db2..339ae2ba62 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -10,7 +10,7 @@ from contextlib import contextmanager import attr from fileformats.core import FileSet -from pydra.engine.helpers import is_lazy +from pydra.engine.helpers import is_lazy, attrs_values logger = logging.getLogger("pydra") @@ -105,7 +105,7 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): """ - inputs_dict_st = attr.asdict(inputs, recurse=False) + inputs_dict_st = attrs_values(inputs) if map_copyfiles is not None: inputs_dict_st.update(map_copyfiles) @@ -114,12 +114,12 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): k = k.split(".")[1] inputs_dict_st[k] = inputs_dict_st[k][v] - from .specs import attr_fields + from .specs import attrs_fields # Collect templated inputs for which all requirements are satisfied. fields_templ = [ field - for field in attr_fields(inputs) + for field in attrs_fields(inputs) if field.metadata.get("output_file_template") and getattr(inputs, field.name) is not False and all( @@ -155,7 +155,7 @@ def template_update_single( from pydra.engine.specs import OUTPUT_TEMPLATE_TYPES if inputs_dict_st is None: - inputs_dict_st = attr.asdict(inputs, recurse=False) + inputs_dict_st = attrs_values(inputs) if spec_type == "input": inp_val_set = inputs_dict_st[field.name] diff --git a/pydra/engine/helpers_state.py b/pydra/engine/helpers_state.py index 866d408a46..76847079d4 100644 --- a/pydra/engine/helpers_state.py +++ b/pydra/engine/helpers_state.py @@ -1,11 +1,10 @@ """Additional functions used mostly by the State class.""" -import attr import itertools from copy import deepcopy import logging import typing as ty -from .helpers import ensure_list +from .helpers import ensure_list, attrs_values logger = logging.getLogger("pydra") @@ -622,9 +621,7 @@ def map_splits(split_iter, inputs, cont_dim=None): def inputs_types_to_dict(name, inputs): """Convert type.Inputs to dictionary.""" # dj: any better option? - input_names = [ - field for field in attr.asdict(inputs, recurse=False) if field != "_func" - ] + input_names = [field for field in attrs_values(inputs) if field != "_func"] inputs_dict = {} for field in input_names: inputs_dict[f"{name}.{field}"] = getattr(inputs, field) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 4b45e9cf7b..404983c48b 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -8,17 +8,26 @@ from glob import glob from typing_extensions import Self import attrs -from fileformats.generic import File, FileSet, Directory +from fileformats.generic import File from pydra.engine.audit import AuditFlag -from pydra.utils.typing import MultiOutputObj, MultiOutputFile -from .helpers import attr_fields, is_lazy +from pydra.utils.typing import TypeParser, MultiOutputObj +from .helpers import attrs_fields, attrs_values, is_lazy, list_fields, ensure_list from .helpers_file import template_update_single from pydra.utils.hash import hash_function, Cache +from pydra.design.base import Arg, Out +from pydra.design import shell + + +def is_set(value: ty.Any) -> bool: + """Check if a value has been set.""" + return value is not attrs.NOTHING class OutSpec: """Base class for all output specifications""" + RESERVED_FIELD_NAMES = ("split", "combine") + def split( self, splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, @@ -90,9 +99,6 @@ class TaskSpec(ty.Generic[OutSpecType]): Task: "ty.Type[core.Task]" - def __attrs_post_init__(self): - self._check_rules() - def __call__( self, name: str | None = None, @@ -106,7 +112,7 @@ def __call__( rerun=False, **kwargs, ): - self._check_for_unset_values() + self._check_rules() task = self.Task( self, name=name, @@ -121,19 +127,8 @@ def __call__( ) return task(**kwargs) - def _check_for_unset_values(self): - if unset := [ - k - for k, v in attrs.asdict(self, recurse=False).items() - if v is attrs.NOTHING - ]: - raise ValueError( - f"The following values {unset} in the {self!r} interface need to be set " - "before the workflow can be constructed" - ) - @property - def hash(self): + def _hash(self): hsh, self._hashes = self._compute_hashes() return hsh @@ -146,91 +141,76 @@ def _hash_changes(self): def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: """Compute a basic hash for any given set of fields.""" inp_dict = {} - for field in attr_fields( - self, exclude_names=("_graph_checksums", "bindings", "files_hash") - ): - if field.metadata.get("output_file_template"): - continue + for field in attrs_fields(self): + if isinstance(field, Out): + continue # Skip output fields # removing values that are not set from hash calculation if getattr(self, field.name) is attrs.NOTHING: continue - if "container_path" in field.metadata: + if getattr(field, "container_path", False): continue inp_dict[field.name] = getattr(self, field.name) hash_cache = Cache() field_hashes = { k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() } - if hasattr(self, "_graph_checksums"): - field_hashes["_graph_checksums"] = self._graph_checksums return hash_function(sorted(field_hashes.items())), field_hashes def _retrieve_values(self, wf, state_index=None): """Parse output results.""" temp_values = {} - for field in attr_fields(self): - # retrieving values that do not have templates - if not field.metadata.get("output_file_template"): - value = getattr(self, field.name) - if is_lazy(value): - temp_values[field.name] = value.get_value( - wf, state_index=state_index - ) + for field in attrs_fields(self): + value = getattr(self, field.name) + if is_lazy(value): + temp_values[field.name] = value.get_value(wf, state_index=state_index) for field, val in temp_values.items(): - value = path_to_string(value) setattr(self, field, val) def _check_rules(self): - fields = attr_fields(self) - - for field in fields: - field_is_mandatory = bool(field.metadata.get("mandatory")) - field_is_unset = getattr(self, field.name) is attrs.NOTHING + """Check if all rules are satisfied.""" - if field_is_unset and not field_is_mandatory: - continue + field: Arg + for field in list_fields(self): + value = getattr(self, field.name) # Collect alternative fields associated with this field. alternative_fields = { - name: getattr(self, name) is not attrs.NOTHING - for name in field.metadata.get("xor", []) - if name != field.name + name: getattr(self, name) for name in field.xor if name != field.name + } + set_alternatives = { + n: v for n, v in alternative_fields.items() if is_set(v) } - alternatives_are_set = any(alternative_fields.values()) # Raise error if no field in mandatory alternative group is set. - if field_is_unset: - if alternatives_are_set: + if not is_set(value): + if set_alternatives: continue message = f"{field.name} is mandatory and unset." if alternative_fields: raise AttributeError( message[:-1] - + f", but no alternative provided by {list(alternative_fields)}." + + f", and no alternative provided in {list(alternative_fields)}." ) else: raise AttributeError(message) # Raise error if multiple alternatives are set. - elif alternatives_are_set: - set_alternative_fields = [ - name for name, is_set in alternative_fields.items() if is_set - ] + elif set_alternatives: raise AttributeError( - f"{field.name} is mutually exclusive with {set_alternative_fields}" + f"{field.name} is mutually exclusive with {set_alternatives}" ) # Collect required fields associated with this field. required_fields = { - name: getattr(self, name) is not attrs.NOTHING - for name in field.metadata.get("requires", []) + name: is_set(getattr(self, name)) + for name in field.requires if name != field.name } # Raise error if any required field is unset. if not all(required_fields.values()): unset_required_fields = [ - name for name, is_set in required_fields.items() if not is_set + name for name, is_set_ in required_fields.items() if not is_set_ ] raise AttributeError(f"{field.name} requires {unset_required_fields}") @@ -258,7 +238,7 @@ class Result: def __getstate__(self): state = self.__dict__.copy() if state["output"] is not None: - fields = tuple((el.name, el.type) for el in attr_fields(state["output"])) + fields = tuple((el.name, el.type) for el in attrs_fields(state["output"])) state["output_spec"] = (state["output"].__class__.__name__, fields) state["output"] = attrs.asdict(state["output"], recurse=False) return state @@ -282,7 +262,7 @@ def get_output_field(self, field_name): Name of field in LazyField object """ if field_name == "all_": - return attrs.asdict(self.output, recurse=False) + return attrs_values(self.output) else: return getattr(self.output, field_name) @@ -339,12 +319,21 @@ class ShellOutSpec(OutSpec): stderr: str """The process' standard input.""" - def _collect_additional_outputs(self, inputs, output_dir, outputs): - from ..utils.typing import TypeParser + RESERVED_FIELD_NAMES = ("split", "combine", "return_code", "stdout", "stderr") - """Collect additional outputs from shelltask output_spec.""" - additional_out = {} - for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): + @classmethod + def collect_outputs( + self, + inputs: "ShellSpec", + output_dir: Path, + return_code: int, + stdout: str, + stderr: str, + ) -> Self: + + outputs = Self(return_code=return_code, stdout=stdout, stderr=stderr) + fld: shell.out + for fld in list_fields(self): if not TypeParser.is_subclass( fld.type, ( @@ -361,135 +350,87 @@ def _collect_additional_outputs(self, inputs, output_dir, outputs): f"Support for {fld.type} type, required for '{fld.name}' in {self}, " "has not been implemented in collect_additional_output" ) - # assuming that field should have either default or metadata, but not both - input_value = getattr(inputs, fld.name, attrs.NOTHING) - if input_value is not attrs.NOTHING: - if TypeParser.contains_type(FileSet, fld.type): - if input_value is not False: - label = f"output field '{fld.name}' of {self}" - input_value = TypeParser(fld.type, label=label).coerce( - input_value - ) - additional_out[fld.name] = input_value - elif ( - fld.default is None or fld.default == attrs.NOTHING - ) and not fld.metadata: # TODO: is it right? - raise AttributeError("File has to have default value or metadata") - elif fld.default != attrs.NOTHING: - additional_out[fld.name] = self._field_defaultvalue(fld, output_dir) - elif fld.metadata: - if ( - fld.type in [int, float, bool, str, list] - and "callable" not in fld.metadata - ): + # Get the corresponding value from the inputs if it exists, which will be + # passed through to the outputs, to permit manual overrides + if isinstance(fld, shell.outarg) and is_set(getattr(inputs, fld.name)): + resolved_value = getattr(inputs, fld.name) + elif is_set(fld.default): + resolved_value = self._resolve_default_value(fld, output_dir) + else: + if fld.type in [int, float, bool, str, list] and not fld.callable: raise AttributeError( f"{fld.type} has to have a callable in metadata" ) - additional_out[fld.name] = self._field_metadata( - fld, inputs, output_dir, outputs + resolved_value = self._generate_implicit_value( + fld, inputs, output_dir, outputs, stdout, stderr ) - return additional_out + # Set the resolved value + setattr(outputs, fld.name, resolved_value) + return outputs - def _generated_output_names(self, inputs, output_dir): + def _generated_output_names( + self, inputs: "ShellSpec", output_dir: Path, stdout: str, stderr: str + ): """Returns a list of all outputs that will be generated by the task. Takes into account the task input and the requires list for the output fields. TODO: should be in all Output specs? """ # checking the input (if all mandatory fields are provided, etc.) - inputs.check_fields_input_spec() + inputs._check_rules() output_names = ["return_code", "stdout", "stderr"] - for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): - if fld.type not in [File, MultiOutputFile, Directory]: - raise Exception("not implemented (collect_additional_output)") + for fld in list_fields(self): # assuming that field should have either default or metadata, but not both - if ( - fld.default in (None, attrs.NOTHING) and not fld.metadata - ): # TODO: is it right? - raise AttributeError("File has to have default value or metadata") - elif fld.default != attrs.NOTHING: + if is_set(fld.default): output_names.append(fld.name) - elif ( - fld.metadata - and self._field_metadata( - fld, inputs, output_dir, outputs=None, check_existance=False - ) - != attrs.NOTHING + elif is_set( + self._generate_implicit_value(fld, inputs, output_dir, stdout, stderr) ): output_names.append(fld.name) return output_names - def _field_defaultvalue(self, fld, output_dir): - """Collect output file if the default value specified.""" - if not isinstance(fld.default, (str, Path)): - raise AttributeError( - f"{fld.name} is a File, so default value " - f"should be a string or a Path, " - f"{fld.default} provided" - ) + def _resolve_default_value(self, fld: shell.out, output_dir: Path) -> ty.Any: + """Resolve path and glob expr default values relative to the output dir""" default = fld.default - if isinstance(default, str): - default = Path(default) - - default = output_dir / default - if "*" not in str(default): - if default.exists(): - return default - else: - raise AttributeError(f"file {default} does not exist") - else: - all_files = [Path(el) for el in glob(str(default.expanduser()))] - if len(all_files) > 1: - return all_files - elif len(all_files) == 1: - return all_files[0] + if fld.type is Path: + assert isinstance(default, Path) + if not default.is_absolute(): + default = output_dir.joinpath(default) + if "*" not in str(default): + if default.exists(): + return default + else: + raise AttributeError(f"file {default} does not exist") else: - raise AttributeError(f"no file matches {default.name}") + all_files = [Path(el) for el in glob(default.expanduser())] + if len(all_files) > 1: + return all_files + elif len(all_files) == 1: + return all_files[0] + else: + raise AttributeError(f"no file matches {default.name}") + return default - def _field_metadata( - self, fld, inputs, output_dir, outputs=None, check_existance=True - ): + def _generate_implicit_value( + self, + fld: shell.out, + inputs: "ShellSpec", + output_dir: Path, + stdout: str, + stderr: str, + ) -> ty.Any: """Collect output file if metadata specified.""" - if self._check_requires(fld, inputs) is False: + if not self._required_fields_set(fld, inputs): return attrs.NOTHING - - if "value" in fld.metadata: - return output_dir / fld.metadata["value"] - # this block is only run if "output_file_template" is provided in output_spec - # if the field is set in input_spec with output_file_template, - # than the field already should have value - elif "output_file_template" in fld.metadata: - value = template_update_single( + elif isinstance(fld, shell.outarg) and fld.path_template: + return template_update_single( fld, inputs=inputs, output_dir=output_dir, spec_type="output" ) - - if fld.type is MultiOutputFile and type(value) is list: - # TODO: how to deal with mandatory list outputs - ret = [] - for val in value: - val = Path(val) - if check_existance and not val.exists(): - ret.append(attrs.NOTHING) - else: - ret.append(val) - return ret - else: - val = Path(value) - # checking if the file exists - if check_existance and not val.exists(): - # if mandatory raise exception - if "mandatory" in fld.metadata: - if fld.metadata["mandatory"]: - raise Exception( - f"mandatory output for variable {fld.name} does not exist" - ) - return attrs.NOTHING - return val - elif "callable" in fld.metadata: - callable_ = fld.metadata["callable"] - if isinstance(callable_, staticmethod): + elif fld.callable: + callable_ = fld.callable + if isinstance(fld.callable, staticmethod): # In case callable is defined as a static method, # retrieve the function wrapped in the descriptor. - callable_ = callable_.__func__ + callable_ = fld.callable.__func__ call_args = inspect.getfullargspec(callable_) call_args_val = {} for argnm in call_args.args: @@ -500,9 +441,9 @@ def _field_metadata( elif argnm == "inputs": call_args_val[argnm] = inputs elif argnm == "stdout": - call_args_val[argnm] = outputs["stdout"] + call_args_val[argnm] = stdout elif argnm == "stderr": - call_args_val[argnm] = outputs["stderr"] + call_args_val[argnm] = stderr else: try: call_args_val[argnm] = getattr(inputs, argnm) @@ -516,36 +457,34 @@ def _field_metadata( else: raise Exception( f"Metadata for '{fld.name}', does not not contain any of the required fields " - f'("callable", "output_file_template" or "value"): {fld.metadata}.' + f'("callable", "output_file_template" or "value"): {fld}.' ) - def _check_requires(self, fld, inputs): + def _required_fields_set(self, fld: shell.out, inputs: "ShellSpec") -> bool: """checking if all fields from the requires and template are set in the input if requires is a list of list, checking if at least one list has all elements set """ - from .helpers import ensure_list - - if "requires" in fld.metadata: - # if requires is a list of list it is treated as el[0] OR el[1] OR... - required_fields = ensure_list(fld.metadata["requires"]) - if all([isinstance(el, list) for el in required_fields]): - field_required_OR = required_fields - # if requires is a list of tuples/strings - I'm creating a 1-el nested list - elif all([isinstance(el, (str, tuple)) for el in required_fields]): - field_required_OR = [required_fields] - else: - raise Exception( - f"requires field can be a list of list, or a list " - f"of strings/tuples, but {fld.metadata['requires']} " - f"provided for {fld.name}" - ) + + if not fld.requires: + return True + + # if requires is a list of list it is treated as el[0] OR el[1] OR... + if all([isinstance(el, list) for el in fld.requires]): + field_required_OR = fld.requires + # if requires is a list of tuples/strings - I'm creating a 1-el nested list + elif all([isinstance(el, (str, tuple)) for el in fld.requires]): + field_required_OR = [fld.requires] else: - field_required_OR = [[]] + raise Exception( + f"requires field can be a list of list, or a list " + f"of strings/tuples, but {fld.metadata['requires']} " + f"provided for {fld.name}" + ) for field_required in field_required_OR: # if the output has output_file_template field, # adding all input fields from the template to requires - if self.path_template: + if isinstance(fld, shell.outarg) and self.path_template: # if a template is a function it has to be run first with the inputs as the only arg if callable(self.path_template): template = self.path_template(inputs) @@ -585,9 +524,7 @@ def _check_requires(self, fld, inputs): # if the specific list from field_required_OR has all elements set, no need to check more if required_found: break - - if not required_found: - raise ValueError("Did not find all required fields in the input") + return required_found class ShellSpec(TaskSpec): @@ -617,13 +554,4 @@ def reset(self): setattr(self, val, donothing) -def path_to_string(value): - """Convert paths to strings.""" - if isinstance(value, Path): - value = str(value) - elif isinstance(value, list) and len(value) and isinstance(value[0], Path): - value = [str(val) for val in value] - return value - - from pydra.engine import core # noqa: E402 diff --git a/pydra/engine/state.py b/pydra/engine/state.py index ffaddf3f3f..8fcfd67571 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -5,7 +5,7 @@ from functools import reduce import attrs from . import helpers_state as hlpst -from .helpers import ensure_list +from .helpers import ensure_list, attrs_values # from .specs import BaseSpec @@ -765,7 +765,7 @@ def prepare_states(self, inputs, cont_dim=None): else: self.cont_dim = {} if attrs.has(inputs): - self.inputs = attrs.asdict(inputs, recurse=False) + self.inputs = attrs_values(inputs) else: self.inputs = inputs if self.other_states: diff --git a/pydra/engine/task.py b/pydra/engine/task.py index f2d3a2283e..378f8ae5c3 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -51,13 +51,15 @@ from pathlib import Path import cloudpickle as cp from fileformats.core import FileSet -from .core import Task, is_lazy +from .core import Task from pydra.utils.messenger import AuditFlag from .specs import ( ShellSpec, - attr_fields, + attrs_fields, ) from .helpers import ( + attrs_values, + is_lazy, parse_format_string, position_sort, ensure_list, @@ -72,7 +74,7 @@ class FunctionTask(Task): """Wrap a Python callable as a task element.""" def _run_task(self, environment=None): - inputs = attr.asdict(self.inputs, recurse=False) + inputs = attrs_values(self.inputs) del inputs["_func"] self.output_ = None output = cp.loads(self.inputs._func)(**inputs) @@ -185,7 +187,7 @@ def command_args(self, root=None): pos_args = [] # list for (position, command arg) self._positions_provided = [] - for field in attr_fields(self.inputs): + for field in attrs_fields(self.inputs): name, meta = field.name, field.metadata if ( getattr(self.inputs, name) is attr.NOTHING @@ -285,7 +287,7 @@ def _command_pos_args(self, field, root=None): ): return None - inputs_dict = attr.asdict(self.inputs, recurse=False) + inputs_dict = attrs_values(self.inputs) cmd_add = [] # formatter that creates a custom command argument @@ -390,7 +392,7 @@ def _prepare_bindings(self, root: str): This updates the ``bindings`` attribute of the current task to make files available in an ``Environment``-defined ``root``. """ - for fld in attr_fields(self.inputs): + for fld in attrs_fields(self.inputs): if TypeParser.contains_type(FileSet, fld.type): fileset = getattr(self.inputs, fld.name) copy = parse_copyfile(fld)[0] == FileSet.CopyMode.copy @@ -439,7 +441,7 @@ def argstr_formatting(argstr, inputs, value_updates=None): """formatting argstr that have form {field_name}, using values from inputs and updating with value_update if provided """ - inputs_dict = attr.asdict(inputs, recurse=False) + inputs_dict = attrs_values(inputs) # if there is a value that has to be updated (e.g. single value from a list) if value_updates: inputs_dict.update(value_updates) diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index 11a8434c22..357bbb5ce2 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -3,7 +3,7 @@ from operator import itemgetter from typing_extensions import Self import attrs -from pydra.engine.helpers import list_fields +from pydra.engine.helpers import list_fields, attrs_values from pydra.engine.specs import TaskSpec, OutSpec, WorkflowOutSpec from .lazy import LazyInField from pydra.utils.hash import hash_function @@ -49,11 +49,7 @@ def construct( lazy_input_names = {f.name for f in lazy_inputs} non_lazy_vals = tuple( sorted( - ( - i - for i in attrs.asdict(spec, recurse=False).items() - if i[0] not in lazy_input_names - ), + (i for i in attrs_values(spec).items() if i[0] not in lazy_input_names), key=itemgetter(0), ) ) @@ -84,7 +80,7 @@ def construct( ), ) - input_values = attrs.asdict(lazy_spec, recurse=False) + input_values = attrs_values(lazy_spec) constructor = input_values.pop("constructor") cls._under_construction = wf try: @@ -111,9 +107,7 @@ def construct( setattr(outputs, outpt.name, outpt_lf) else: if unset_outputs := [ - a - for a, v in attrs.asdict(outputs, recurse=False).items() - if v is attrs.NOTHING + a for a, v in attrs_values(outputs).items() if v is attrs.NOTHING ]: raise ValueError( f"Expected outputs {unset_outputs} to be set by the " diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 197d3ca32d..f2fa14adba 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -5,7 +5,7 @@ from pydra.utils.typing import TypeParser, StateArray from . import lazy from ..specs import TaskSpec, OutSpec -from ..helpers import ensure_list +from ..helpers import ensure_list, attrs_values from .. import helpers_state as hlpst from ..state import State @@ -111,7 +111,7 @@ def state(self): @property def input_values(self) -> tuple[tuple[str, ty.Any]]: - return tuple(attrs.asdict(self._spec, recurse=False).items()) + return tuple(attrs_values(self._spec).items()) @property def lzout(self) -> OutputType: @@ -130,7 +130,7 @@ def lzout(self) -> OutputType: outputs = self.inputs.Outputs(**lazy_fields) # Flag the output lazy fields as being not typed checked (i.e. assigned to another # node's inputs) yet - for outpt in attrs.asdict(outputs, recurse=False).values(): + for outpt in attrs_values(outputs).values(): outpt.type_checked = False outputs._node = self self._lzout = outputs @@ -353,7 +353,7 @@ def _wrap_lzout_types_in_state_arrays(self) -> None: elif all(s not in self.state.combiner for s in split): remaining_splits.append(split) state_depth = len(remaining_splits) - for outpt_lf in attrs.asdict(self.lzout, recurse=False).values(): + for outpt_lf in attrs_values(self.lzout).values(): assert not outpt_lf.type_checked type_, _ = TypeParser.strip_splits(outpt_lf.type) for _ in range(state_depth): From d2f0bf3bc96315e07053e7f009f84d33ec4946ed Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 21:48:58 +1100 Subject: [PATCH 052/342] updated attrs required version to latest --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 871b9efc88..cfe924a9f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description = "Pydra dataflow engine" readme = "README.rst" requires-python = ">=3.10" dependencies = [ - "attrs >=19.1.0", + "attrs >=24.2.0", "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", From 35f489cb266cd9fd559d75580d5e9906fda0a1ec Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 21:49:21 +1100 Subject: [PATCH 053/342] debugged reworking of specs module --- pydra/design/base.py | 125 +++++++++++++-- pydra/design/python.py | 8 +- pydra/design/shell.py | 8 +- pydra/design/tests/test_workflow.py | 6 +- pydra/design/workflow.py | 20 ++- pydra/engine/core.py | 12 +- pydra/engine/helpers.py | 22 ++- pydra/engine/specs.py | 239 +++++++++++++++------------- pydra/engine/workflow/base.py | 2 +- 9 files changed, 287 insertions(+), 155 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index c54b8f31c8..f859e7f3fa 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -5,6 +5,7 @@ import enum from pathlib import Path from copy import copy +from typing_extensions import Self import attrs.validators from attrs.converters import default_if_none from fileformats.generic import File @@ -58,9 +59,103 @@ def is_type(_, __, val: ty.Any) -> bool: def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: """Ensure the default value has been coerced into the correct type""" + if value is EMPTY: + return value return TypeParser[self_.type](self_.type, label=self_.name)(value) +@attrs.define +class Requirement: + """Define a requirement for a task input field + + Parameters + ---------- + name : str + The name of the input field that is required + allowed_values : list[str], optional + The allowed values for the input field that is required, if not provided any + value is allowed + """ + + name: str + allowed_values: list[str] = attrs.field(factory=list, converter=list) + + def satisfied(self, inputs: "TaskSpec") -> bool: + """Check if the requirement is satisfied by the inputs""" + value = getattr(inputs, self.name) + if value is attrs.NOTHING: + return False + return not self.allowed_values or value in self.allowed_values + + @classmethod + def parse(value: ty.Any) -> Self: + if isinstance(value, Requirement): + return value + elif isinstance(value, str): + return Requirement(value) + else: + name, allowed_values = value + if isinstance(allowed_values, str) or not isinstance( + allowed_values, ty.Collection + ): + raise ValueError( + f"allowed_values must be a collection of strings, not {allowed_values}" + ) + return Requirement(name, allowed_values) + + def __str__(self): + if not self.allowed_values: + return self.name + return f"{self.name}(" + ",".join(repr(v) for v in self.allowed_values) + ")" + + +def requirements_converter(value: ty.Any) -> list[Requirement]: + """Ensure the requires field is a list of Requirement objects""" + if isinstance(value, (str, tuple, Requirement)): + return [value] + return [Requirement.parse(v) for v in value] + + +@attrs.define +class RequirementSet: + """Define a set of requirements for a task input field, all of which must be satisfied""" + + requirements: list[Requirement] = attrs.field( + factory=list, + converter=requirements_converter, + ) + + def satisfied(self, inputs: "TaskSpec") -> bool: + """Check if all the requirements are satisfied by the inputs""" + return all(req.satisfied(inputs) for req in self.requirements) + + def __str__(self): + if len(self.requirements) == 1: + return str(self.requirements[0]) + return "+".join(str(r) for r in self.requirements) + + def __iter__(self): + return iter(self.requirements) + + def __iadd__(self, other: "RequirementSet | list[Requirement]") -> "RequirementSet": + self.requirements.extend(requirements_converter(other)) + return self + + +def requires_converter( + value: ( + str + | ty.Collection[ + Requirement | str | ty.Collection[str | tuple[str, ty.Collection[ty.Any]]] + ] + ), +) -> list[RequirementSet]: + """Ensure the requires field is a tuple of tuples""" + if isinstance(value, (str, tuple, Requirement)): + return [RequirementSet(value)] + return [RequirementSet(v) for v in value] + + @attrs.define(kw_only=True) class Field: """Base class for input and output fields to task specifications @@ -76,8 +171,11 @@ class Field: the default value for the field, by default it is EMPTY help_string: str, optional A short description of the input field. - requires: list, optional - Names of the inputs that are required together with the field. + requires: str | list[str | list[str] | Requirement], optional + The input fields that are required to be provided, along with the optional allowed + values, that are required together with the field. Can be provided + as a single name, a collection of names, a collection of collections of names, + or a collection of collection of name/allowed values pairs. converter: callable, optional The converter for the field passed through to the attrs.field, by default it is None validator: callable | iterable[callable], optional @@ -89,15 +187,19 @@ class Field: validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) ) default: ty.Any = attrs.field( - default=EMPTY, converter=attrs.Converter(convert_default_value, with_self=True) + default=EMPTY, converter=attrs.Converter(convert_default_value, takes_self=True) ) help_string: str = "" - requires: list[str] | list[list[str]] = attrs.field( - factory=list, converter=ensure_list + requires: list[RequirementSet] = attrs.field( + factory=list, converter=requires_converter ) converter: ty.Callable | None = None validator: ty.Callable | None = None + def requirements_satisfied(self, inputs: "TaskSpec") -> bool: + """Check if all the requirements are satisfied by the inputs""" + return any(req.satisfied(inputs) for req in self.requires) + @attrs.define(kw_only=True) class Arg(Field): @@ -118,7 +220,7 @@ class Arg(Field): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list, optional + xor: list[str], optional Names of the inputs that are mutually exclusive with the field. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any @@ -133,8 +235,8 @@ class Arg(Field): it is False """ - allowed_values: list | None = None - xor: list | None = None + allowed_values: tuple = attrs.field(default=(), converter=tuple) + xor: tuple[str] = attrs.field(default=(), converter=tuple) copy_mode: File.CopyMode = File.CopyMode.any copy_collation: File.CopyCollation = File.CopyCollation.any copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single @@ -292,6 +394,8 @@ def make_task_spec( """ from pydra.engine.specs import TaskSpec + spec_type._check_arg_refs(inputs, outputs) + if name is None and klass is not None: name = klass.__name__ outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) @@ -326,6 +430,7 @@ def make_task_spec( # Now that we have saved the attributes in lists to be for arg in inputs.values(): # If an outarg input then the field type should be Path not a FileSet + default_kwargs = _get_default(arg) if isinstance(arg, Out) and is_fileset_or_union(arg.type): if getattr(arg, "path_template", False): if is_optional(arg.type): @@ -333,7 +438,7 @@ def make_task_spec( # Will default to None and not be inserted into the command else: field_type = Path | bool - arg.default = True + default_kwargs = {"default": True} elif is_optional(arg.type): field_type = Path | None else: @@ -348,7 +453,7 @@ def make_task_spec( validator=make_validator(arg, klass.__name__), metadata={PYDRA_ATTR_METADATA: arg}, on_setattr=attrs.setters.convert, - **_get_default(arg), + **default_kwargs, ), ) klass.__annotations__[arg.name] = field_type diff --git a/pydra/design/python.py b/pydra/design/python.py index 9de6860e1d..cde35e94a8 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -1,8 +1,6 @@ import typing as ty import inspect import attrs -from pydra.engine.task import FunctionTask -from pydra.engine.specs import PythonSpec, PythonOutSpec from .base import ( Arg, Out, @@ -14,6 +12,8 @@ extract_fields_from_class, ) +if ty.TYPE_CHECKING: + from pydra.engine.specs import PythonSpec __all__ = ["arg", "out", "define"] @@ -87,7 +87,7 @@ def define( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, -) -> PythonSpec: +) -> "PythonSpec": """ Create an interface for a function or a class. @@ -102,6 +102,8 @@ def define( auto_attribs : bool Whether to use auto_attribs mode when creating the class. """ + from pydra.engine.task import FunctionTask + from pydra.engine.specs import PythonSpec, PythonOutSpec def make(wrapped: ty.Callable | type) -> PythonSpec: if inspect.isclass(wrapped): diff --git a/pydra/design/shell.py b/pydra/design/shell.py index d6191943d9..a8c8d46b33 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -11,7 +11,6 @@ from fileformats.core import from_mime from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError -from pydra.engine.specs import ShellSpec, ShellOutSpec from pydra.engine.helpers import attrs_values from .base import ( Arg, @@ -23,8 +22,9 @@ EMPTY, ) from pydra.utils.typing import is_fileset_or_union, MultiInputObj -from pydra.engine.task import ShellCommandTask +if ty.TYPE_CHECKING: + from pydra.engine.specs import ShellSpec __all__ = ["arg", "out", "outarg", "define"] @@ -202,7 +202,7 @@ def define( outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, name: str | None = None, -) -> ShellSpec: +) -> "ShellSpec": """Create a task specification for a shell command. Can be used either as a decorator on the "canonical" dataclass-form of a task specification or as a function that takes a "shell-command template string" of the form @@ -254,6 +254,8 @@ def define( ShellSpec The interface for the shell command """ + from pydra.engine.task import ShellCommandTask + from pydra.engine.specs import ShellSpec, ShellOutSpec def make( wrapped: ty.Callable | type | None = None, diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index f4c1c0c19d..73acdf0396 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -6,7 +6,7 @@ import typing as ty from pydra.design import shell, python, workflow from pydra.engine.helpers import list_fields -from pydra.engine.specs import TaskSpec +from pydra.engine.specs import WorkflowSpec, WorkflowOutSpec from fileformats import video, image # NB: We use PascalCase for interfaces and workflow functions as it is translated into a class @@ -138,7 +138,7 @@ def test_workflow_canonical(): # NB: We use PascalCase (i.e. class names) as it is translated into a class @workflow.define - class MyTestWorkflow(TaskSpec["MyTestWorkflow.Outputs"]): + class MyTestWorkflow(WorkflowSpec["MyTestWorkflow.Outputs"]): a: int b: float = workflow.arg( @@ -152,7 +152,7 @@ def constructor(a, b): mul = workflow.add(Mul(a=add.out, b=b)) return mul.out - class Outputs: + class Outputs(WorkflowOutSpec): out: float constructor = MyTestWorkflow().constructor diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 86a9f3ca9a..7c967910fa 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -1,8 +1,6 @@ import typing as ty import inspect import attrs -from pydra.engine.core import WorkflowTask -from pydra.engine.workflow.base import Workflow from .base import ( Arg, Out, @@ -13,7 +11,10 @@ check_explicit_fields_are_none, extract_fields_from_class, ) -from pydra.engine.specs import TaskSpec, OutSpec, WorkflowSpec, WorkflowOutSpec + +if ty.TYPE_CHECKING: + from pydra.engine.workflow.base import Workflow + from pydra.engine.specs import TaskSpec, OutSpec, WorkflowSpec __all__ = ["define", "add", "this", "arg", "out"] @@ -92,7 +93,7 @@ def define( outputs_bases: ty.Sequence[type] = (), lazy: list[str] | None = None, auto_attribs: bool = True, -) -> TaskSpec: +) -> "WorkflowSpec": """ Create an interface for a function or a class. Can be used either as a decorator on a constructor function or the "canonical" dataclass-form of a task specification. @@ -113,6 +114,9 @@ def define( TaskSpec The interface for the function or class. """ + from pydra.engine.core import WorkflowTask + from pydra.engine.specs import TaskSpec, WorkflowSpec, WorkflowOutSpec + if lazy is None: lazy = [] @@ -174,7 +178,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: return make -def this() -> Workflow: +def this() -> "Workflow": """Get the workflow currently being constructed. Returns @@ -182,13 +186,15 @@ def this() -> Workflow: Workflow The workflow currently being constructed. """ + from pydra.engine.workflow.base import Workflow + return Workflow.under_construction -OutSpecType = ty.TypeVar("OutSpecType", bound=OutSpec) +OutSpecType = ty.TypeVar("OutSpecType", bound="OutSpec") -def add(task_spec: TaskSpec[OutSpecType], name: str = None) -> OutSpecType: +def add(task_spec: "TaskSpec[OutSpecType]", name: str = None) -> OutSpecType: """Add a node to the workflow currently being constructed Parameters diff --git a/pydra/engine/core.py b/pydra/engine/core.py index f9c833ecbf..18631d38ea 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -23,7 +23,6 @@ Result, TaskHook, ) -from .workflow.lazy import is_lazy from .helpers import ( create_checksum, attrs_fields, @@ -35,6 +34,7 @@ record_error, PydraFileLock, parse_copyfile, + is_lazy, ) from pydra.utils.hash import hash_function from .helpers_file import copy_nested_files, template_update @@ -626,7 +626,7 @@ def pickle_task(self): def done(self): """Check whether the tasks has been finalized and all outputs are stored.""" # if any of the field is lazy, there is no need to check results - if is_lazy(self.inputs): + if has_lazy(self.inputs): return False _result = self.result() if self.state: @@ -1300,3 +1300,11 @@ def is_task(obj): def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" return isinstance(obj, WorkflowTask) + + +def has_lazy(obj): + """Check whether an object has lazy fields.""" + for f in attrs_fields(obj): + if is_lazy(getattr(obj, f.name)): + return True + return False diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index e2e69e4c06..c03ade9205 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -4,6 +4,7 @@ import asyncio.subprocess as asp from pathlib import Path import os +import inspect import sys from uuid import uuid4 import getpass @@ -15,7 +16,6 @@ import attrs from filelock import SoftFileLock, Timeout import cloudpickle as cp -from .helpers_file import copy_nested_files from fileformats.core import FileSet if ty.TYPE_CHECKING: @@ -36,13 +36,15 @@ def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: return attrs.asdict(obj, recurse=False, **kwargs) -def list_fields(interface: "TaskSpec") -> list["Field"]: +def list_fields(spec: "type[TaskSpec] | TaskSpec") -> list["Field"]: """List the fields of a task specification""" - if not attrs.has(interface): + if not inspect.isclass(spec): + spec = type(spec) + if not attrs.has(spec): return [] return [ f.metadata[PYDRA_ATTR_METADATA] - for f in attrs.fields(interface) + for f in attrs.fields(spec) if PYDRA_ATTR_METADATA in f.metadata ] @@ -154,6 +156,8 @@ def save(task_path: Path, result=None, task=None, name_prefix=None) -> None: def copyfile_workflow(wf_path: os.PathLike, result): """if file in the wf results, the file will be copied to the workflow directory""" + from .helpers_file import copy_nested_files + for field in attrs_fields(result.output): value = getattr(result.output, field.name) # if the field is a path or it can contain a path _copyfile_single_value is run @@ -627,13 +631,7 @@ def ensure_list(obj, tuple2list=False): def is_lazy(obj): - """Check whether an object has any field that is a Lazy Field""" + """Check whether an object is a lazy field or has any attribute that is a Lazy Field""" from pydra.engine.workflow.lazy import LazyField - if is_lazy(obj): - return True - - for f in attrs_fields(obj): - if isinstance(getattr(obj, f.name), LazyField): - return True - return False + return isinstance(obj, LazyField) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 404983c48b..be1891d811 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -4,17 +4,19 @@ from pathlib import Path import re import inspect +import itertools import typing as ty from glob import glob +from copy import deepcopy from typing_extensions import Self import attrs from fileformats.generic import File from pydra.engine.audit import AuditFlag from pydra.utils.typing import TypeParser, MultiOutputObj -from .helpers import attrs_fields, attrs_values, is_lazy, list_fields, ensure_list +from .helpers import attrs_fields, attrs_values, is_lazy, list_fields from .helpers_file import template_update_single from pydra.utils.hash import hash_function, Cache -from pydra.design.base import Arg, Out +from pydra.design.base import Field, Arg, Out, RequirementSet from pydra.design import shell @@ -174,45 +176,65 @@ def _check_rules(self): value = getattr(self, field.name) # Collect alternative fields associated with this field. - alternative_fields = { - name: getattr(self, name) for name in field.xor if name != field.name - } - set_alternatives = { - n: v for n, v in alternative_fields.items() if is_set(v) - } - - # Raise error if no field in mandatory alternative group is set. - if not is_set(value): - if set_alternatives: - continue - message = f"{field.name} is mandatory and unset." - if alternative_fields: + if field.xor: + alternative_fields = { + name: getattr(self, name) + for name in field.xor + if name != field.name + } + set_alternatives = { + n: v for n, v in alternative_fields.items() if is_set(v) + } + + # Raise error if no field in mandatory alternative group is set. + if not is_set(value): + if set_alternatives: + continue + message = f"{field.name} is mandatory and unset." + if alternative_fields: + raise AttributeError( + message[:-1] + + f", and no alternative provided in {list(alternative_fields)}." + ) + else: + raise AttributeError(message) + + # Raise error if multiple alternatives are set. + elif set_alternatives: raise AttributeError( - message[:-1] - + f", and no alternative provided in {list(alternative_fields)}." + f"{field.name} is mutually exclusive with {set_alternatives}" ) - else: - raise AttributeError(message) - # Raise error if multiple alternatives are set. - elif set_alternatives: - raise AttributeError( - f"{field.name} is mutually exclusive with {set_alternatives}" + # Raise error if any required field is unset. + if field.requires and not any(rs.satisfied(self) for rs in field.requires): + raise ValueError( + f"{field.name} requires at least one of the requirement sets to be " + f"satisfied: {[str(r) for r in field.requires]}" ) - # Collect required fields associated with this field. - required_fields = { - name: is_set(getattr(self, name)) - for name in field.requires - if name != field.name - } - - # Raise error if any required field is unset. - if not all(required_fields.values()): - unset_required_fields = [ - name for name, is_set_ in required_fields.items() if not is_set_ - ] - raise AttributeError(f"{field.name} requires {unset_required_fields}") + @classmethod + def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: + """ + Checks if all fields referenced in requirements and xor are present in the inputs + are valid field names + """ + field: Field + input_names = set(inputs) + for field in itertools.chain(inputs.values(), outputs.values()): + if unrecognised := ( + set([r.name for s in field.requires for r in s.requirements]) + - input_names + ): + raise ValueError( + "'Unrecognised' field names in referenced in the requirements " + f"of {field} " + str(list(unrecognised)) + ) + for inpt in inputs.values(): + if unrecognised := set(inpt.xor) - input_names: + raise ValueError( + "'Unrecognised' field names in referenced in the xor " + f"of {inpt} " + str(list(unrecognised)) + ) @attrs.define(kw_only=True) @@ -296,7 +318,10 @@ class PythonOutSpec(OutSpec): pass -class PythonSpec(TaskSpec): +PythonOutSpecType = ty.TypeVar("OutputType", bound=PythonOutSpec) + + +class PythonSpec(TaskSpec[PythonOutSpecType]): pass @@ -304,36 +329,57 @@ class WorkflowOutSpec(OutSpec): pass -class WorkflowSpec(TaskSpec): +WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutSpec) + + +class WorkflowSpec(TaskSpec[WorkflowOutSpecType]): pass -@attrs.define(kw_only=True) class ShellOutSpec(OutSpec): """Output specification of a generic shell process.""" - return_code: int + return_code: int = shell.out() """The process' exit code.""" - stdout: str + stdout: str = shell.out() """The process' standard output.""" - stderr: str + stderr: str = shell.out() """The process' standard input.""" - RESERVED_FIELD_NAMES = ("split", "combine", "return_code", "stdout", "stderr") - @classmethod def collect_outputs( - self, + cls, inputs: "ShellSpec", output_dir: Path, - return_code: int, stdout: str, stderr: str, + return_code: int, ) -> Self: + """Collect the outputs of a shell process from a combination of the provided inputs, + the objects in the output directory, and the stdout and stderr of the process. + + Parameters + ---------- + inputs : ShellSpec + The input specification of the shell process. + output_dir : Path + The directory where the process was run. + stdout : str + The standard output of the process. + stderr : str + The standard error of the process. + return_code : int + The exit code of the process. + + Returns + ------- + outputs : ShellOutSpec + The outputs of the shell process + """ - outputs = Self(return_code=return_code, stdout=stdout, stderr=stderr) + outputs = cls(return_code=return_code, stdout=stdout, stderr=stderr) fld: shell.out - for fld in list_fields(self): + for fld in list_fields(cls): if not TypeParser.is_subclass( fld.type, ( @@ -347,7 +393,7 @@ def collect_outputs( ), ): raise TypeError( - f"Support for {fld.type} type, required for '{fld.name}' in {self}, " + f"Support for {fld.type} type, required for '{fld.name}' in {cls}, " "has not been implemented in collect_additional_output" ) # Get the corresponding value from the inputs if it exists, which will be @@ -355,21 +401,22 @@ def collect_outputs( if isinstance(fld, shell.outarg) and is_set(getattr(inputs, fld.name)): resolved_value = getattr(inputs, fld.name) elif is_set(fld.default): - resolved_value = self._resolve_default_value(fld, output_dir) + resolved_value = cls._resolve_default_value(fld, output_dir) else: if fld.type in [int, float, bool, str, list] and not fld.callable: raise AttributeError( f"{fld.type} has to have a callable in metadata" ) - resolved_value = self._generate_implicit_value( + resolved_value = cls._generate_implicit_value( fld, inputs, output_dir, outputs, stdout, stderr ) # Set the resolved value setattr(outputs, fld.name, resolved_value) return outputs + @classmethod def _generated_output_names( - self, inputs: "ShellSpec", output_dir: Path, stdout: str, stderr: str + cls, inputs: "ShellSpec", output_dir: Path, stdout: str, stderr: str ): """Returns a list of all outputs that will be generated by the task. Takes into account the task input and the requires list for the output fields. @@ -378,17 +425,18 @@ def _generated_output_names( # checking the input (if all mandatory fields are provided, etc.) inputs._check_rules() output_names = ["return_code", "stdout", "stderr"] - for fld in list_fields(self): + for fld in list_fields(cls): # assuming that field should have either default or metadata, but not both if is_set(fld.default): output_names.append(fld.name) elif is_set( - self._generate_implicit_value(fld, inputs, output_dir, stdout, stderr) + cls._generate_implicit_value(fld, inputs, output_dir, stdout, stderr) ): output_names.append(fld.name) return output_names - def _resolve_default_value(self, fld: shell.out, output_dir: Path) -> ty.Any: + @classmethod + def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: """Resolve path and glob expr default values relative to the output dir""" default = fld.default if fld.type is Path: @@ -410,8 +458,9 @@ def _resolve_default_value(self, fld: shell.out, output_dir: Path) -> ty.Any: raise AttributeError(f"no file matches {default.name}") return default + @classmethod def _generate_implicit_value( - self, + cls, fld: shell.out, inputs: "ShellSpec", output_dir: Path, @@ -419,7 +468,7 @@ def _generate_implicit_value( stderr: str, ) -> ty.Any: """Collect output file if metadata specified.""" - if not self._required_fields_set(fld, inputs): + if not cls._required_fields_satisfied(fld, inputs): return attrs.NOTHING elif isinstance(fld, shell.outarg) and fld.path_template: return template_update_single( @@ -460,7 +509,8 @@ def _generate_implicit_value( f'("callable", "output_file_template" or "value"): {fld}.' ) - def _required_fields_set(self, fld: shell.out, inputs: "ShellSpec") -> bool: + @classmethod + def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellSpec") -> bool: """checking if all fields from the requires and template are set in the input if requires is a list of list, checking if at least one list has all elements set """ @@ -468,63 +518,24 @@ def _required_fields_set(self, fld: shell.out, inputs: "ShellSpec") -> bool: if not fld.requires: return True - # if requires is a list of list it is treated as el[0] OR el[1] OR... - if all([isinstance(el, list) for el in fld.requires]): - field_required_OR = fld.requires - # if requires is a list of tuples/strings - I'm creating a 1-el nested list - elif all([isinstance(el, (str, tuple)) for el in fld.requires]): - field_required_OR = [fld.requires] + requirements: list[RequirementSet] + if fld.requires: + requirements = deepcopy(fld.requires) else: - raise Exception( - f"requires field can be a list of list, or a list " - f"of strings/tuples, but {fld.metadata['requires']} " - f"provided for {fld.name}" - ) - - for field_required in field_required_OR: - # if the output has output_file_template field, - # adding all input fields from the template to requires - if isinstance(fld, shell.outarg) and self.path_template: - # if a template is a function it has to be run first with the inputs as the only arg - if callable(self.path_template): - template = self.path_template(inputs) - inp_fields = re.findall(r"{(\w+)(?:\:[^\}]+)?}", template) - field_required += [ - el[1:-1] for el in inp_fields if el[1:-1] not in field_required - ] - - # it's a flag, of the field from the list is not in input it will be changed to False - required_found = True - for field_required in field_required_OR: - required_found = True - # checking if the input fields from requires have set values - for inp in field_required: - if isinstance(inp, str): # name of the input field - if not hasattr(inputs, inp): - raise Exception( - f"{inp} is not a valid input field, can't be used in requires" - ) - elif getattr(inputs, inp) in [attrs.NOTHING, None]: - required_found = False - break - elif isinstance(inp, tuple): # (name, allowed values) - inp, allowed_val = inp[0], ensure_list(inp[1]) - if not hasattr(inputs, inp): - raise Exception( - f"{inp} is not a valid input field, can't be used in requires" - ) - elif getattr(inputs, inp) not in allowed_val: - required_found = False - break - else: - raise Exception( - f"each element of the requires element should be a string or a tuple, " - f"but {inp} is found in {field_required}" - ) - # if the specific list from field_required_OR has all elements set, no need to check more - if required_found: - break - return required_found + requirements = [RequirementSet()] + + # if the output has output_file_template field, add in all input fields from + # the template to requires + if isinstance(fld, shell.outarg) and fld.path_template: + # if a template is a function it has to be run first with the inputs as the only arg + if callable(fld.path_template): + template = fld.path_template(inputs) + inp_fields = re.findall(r"{(\w+)(?:\:[^\}]+)?}", template) + for req in requirements: + req += inp_fields + + # Check to see if any of the requirement sets are satisfied + return any(rs.satisfied(inputs) for rs in requirements) class ShellSpec(TaskSpec): diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index 357bbb5ce2..1f6fd7f680 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -88,7 +88,7 @@ def construct( output_lazy_fields = constructor(**input_values) # Check to see whether any mandatory inputs are not set for node in wf.nodes: - node.inputs._check_for_unset_values() + node._spec._check_rules() # Check that the outputs are set correctly, either directly by the constructor # or via returned values that can be zipped with the output names if output_lazy_fields: From a9071e4e25c7c48947f08a7d994271d1625620ad Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 21:50:31 +1100 Subject: [PATCH 054/342] renamed OutSpec to Outputs --- pydra/design/base.py | 12 ++++++------ pydra/design/python.py | 4 ++-- pydra/design/shell.py | 4 ++-- pydra/design/workflow.py | 10 +++++----- pydra/engine/boutiques.py | 4 ++-- pydra/engine/specs.py | 16 ++++++++-------- pydra/engine/workflow/base.py | 6 +++--- pydra/engine/workflow/node.py | 4 ++-- pydra/mark/shell.py | 8 ++++---- pydra/utils/tests/utils.py | 6 +++--- 10 files changed, 37 insertions(+), 37 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index f859e7f3fa..f81dd6602a 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -26,7 +26,7 @@ if ty.TYPE_CHECKING: - from pydra.engine.specs import TaskSpec, OutSpec + from pydra.engine.specs import TaskSpec, Outputs from pydra.engine.core import Task __all__ = [ @@ -354,7 +354,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: def make_task_spec( spec_type: type["TaskSpec"], - out_type: type["OutSpec"], + out_type: type["Outputs"], task_type: type["Task"], inputs: dict[str, Arg], outputs: dict[str, Out], @@ -466,11 +466,11 @@ def make_task_spec( def make_outputs_spec( - spec_type: type["OutSpec"], + spec_type: type["Outputs"], outputs: dict[str, Out], bases: ty.Sequence[type], spec_name: str, -) -> type["OutSpec"]: +) -> type["Outputs"]: """Create an outputs specification class and its outputs specification class from the output fields provided to the decorator/function. @@ -491,10 +491,10 @@ def make_outputs_spec( klass : type The class created using the attrs package """ - from pydra.engine.specs import OutSpec + from pydra.engine.specs import Outputs if not any(issubclass(b, spec_type) for b in bases): - if out_spec_bases := [b for b in bases if issubclass(b, OutSpec)]: + if out_spec_bases := [b for b in bases if issubclass(b, Outputs)]: raise ValueError( f"Cannot make {spec_type} output spec from {out_spec_bases} bases" ) diff --git a/pydra/design/python.py b/pydra/design/python.py index cde35e94a8..5f050ac11b 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -103,7 +103,7 @@ def define( Whether to use auto_attribs mode when creating the class. """ from pydra.engine.task import FunctionTask - from pydra.engine.specs import PythonSpec, PythonOutSpec + from pydra.engine.specs import PythonSpec, PythonOutputs def make(wrapped: ty.Callable | type) -> PythonSpec: if inspect.isclass(wrapped): @@ -142,7 +142,7 @@ def make(wrapped: ty.Callable | type) -> PythonSpec: interface = make_task_spec( PythonSpec, - PythonOutSpec, + PythonOutputs, FunctionTask, parsed_inputs, parsed_outputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index a8c8d46b33..4c08612fcd 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -255,7 +255,7 @@ def define( The interface for the shell command """ from pydra.engine.task import ShellCommandTask - from pydra.engine.specs import ShellSpec, ShellOutSpec + from pydra.engine.specs import ShellSpec, ShellOutputs def make( wrapped: ty.Callable | type | None = None, @@ -338,7 +338,7 @@ def make( interface = make_task_spec( ShellSpec, - ShellOutSpec, + ShellOutputs, ShellCommandTask, parsed_inputs, parsed_outputs, diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 7c967910fa..abe1320221 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -14,7 +14,7 @@ if ty.TYPE_CHECKING: from pydra.engine.workflow.base import Workflow - from pydra.engine.specs import TaskSpec, OutSpec, WorkflowSpec + from pydra.engine.specs import TaskSpec, Outputs, WorkflowSpec __all__ = ["define", "add", "this", "arg", "out"] @@ -115,7 +115,7 @@ def define( The interface for the function or class. """ from pydra.engine.core import WorkflowTask - from pydra.engine.specs import TaskSpec, WorkflowSpec, WorkflowOutSpec + from pydra.engine.specs import TaskSpec, WorkflowSpec, WorkflowOutputs if lazy is None: lazy = [] @@ -159,7 +159,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: interface = make_task_spec( WorkflowSpec, - WorkflowOutSpec, + WorkflowOutputs, WorkflowTask, parsed_inputs, parsed_outputs, @@ -191,7 +191,7 @@ def this() -> "Workflow": return Workflow.under_construction -OutSpecType = ty.TypeVar("OutSpecType", bound="OutSpec") +OutSpecType = ty.TypeVar("OutSpecType", bound="Outputs") def add(task_spec: "TaskSpec[OutSpecType]", name: str = None) -> OutSpecType: @@ -207,7 +207,7 @@ def add(task_spec: "TaskSpec[OutSpecType]", name: str = None) -> OutSpecType: Returns ------- - OutSpec + Outputs The outputs specification of the node """ return this().add(task_spec, name=name) diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py index 8202da6b99..3f1b7bb4b2 100644 --- a/pydra/engine/boutiques.py +++ b/pydra/engine/boutiques.py @@ -7,7 +7,7 @@ from pydra.utils.messenger import AuditFlag from pydra.engine.task import ShellCommandTask -from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutSpec, File, attrs_fields +from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutputs, File, attrs_fields from .helpers_file import is_local_file @@ -175,7 +175,7 @@ def _prepare_output_spec(self, names_subset=None): if names_subset: raise RuntimeError(f"{names_subset} are not in the zenodo output spec") - spec = SpecInfo(name="Outputs", fields=fields, bases=(ShellOutSpec,)) + spec = SpecInfo(name="Outputs", fields=fields, bases=(ShellOutputs,)) return spec def _command_args_single(self, state_ind=None, index=None): diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index be1891d811..fb74e2b5d0 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -25,7 +25,7 @@ def is_set(value: ty.Any) -> bool: return value is not attrs.NOTHING -class OutSpec: +class Outputs: """Base class for all output specifications""" RESERVED_FIELD_NAMES = ("split", "combine") @@ -93,7 +93,7 @@ def combine( return self -OutSpecType = ty.TypeVar("OutputType", bound=OutSpec) +OutSpecType = ty.TypeVar("OutputType", bound=Outputs) class TaskSpec(ty.Generic[OutSpecType]): @@ -314,29 +314,29 @@ class RuntimeSpec: network: bool = False -class PythonOutSpec(OutSpec): +class PythonOutputs(Outputs): pass -PythonOutSpecType = ty.TypeVar("OutputType", bound=PythonOutSpec) +PythonOutSpecType = ty.TypeVar("OutputType", bound=PythonOutputs) class PythonSpec(TaskSpec[PythonOutSpecType]): pass -class WorkflowOutSpec(OutSpec): +class WorkflowOutputs(Outputs): pass -WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutSpec) +WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutputs) class WorkflowSpec(TaskSpec[WorkflowOutSpecType]): pass -class ShellOutSpec(OutSpec): +class ShellOutputs(Outputs): """Output specification of a generic shell process.""" return_code: int = shell.out() @@ -373,7 +373,7 @@ def collect_outputs( Returns ------- - outputs : ShellOutSpec + outputs : ShellOutputs The outputs of the shell process """ diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index 1f6fd7f680..e14f581ec9 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -4,15 +4,15 @@ from typing_extensions import Self import attrs from pydra.engine.helpers import list_fields, attrs_values -from pydra.engine.specs import TaskSpec, OutSpec, WorkflowOutSpec +from pydra.engine.specs import TaskSpec, Outputs, WorkflowOutputs from .lazy import LazyInField from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray from .node import Node -OutSpecType = ty.TypeVar("OutputType", bound=OutSpec) -WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutSpec) +OutSpecType = ty.TypeVar("OutputType", bound=Outputs) +WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutputs) @attrs.define(auto_attribs=False) diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index f2fa14adba..307610d7c5 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -4,7 +4,7 @@ import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy -from ..specs import TaskSpec, OutSpec +from ..specs import TaskSpec, Outputs from ..helpers import ensure_list, attrs_values from .. import helpers_state as hlpst from ..state import State @@ -13,7 +13,7 @@ from .base import Workflow -OutputType = ty.TypeVar("OutputType", bound=OutSpec) +OutputType = ty.TypeVar("OutputType", bound=Outputs) Splitter = ty.Union[str, ty.Tuple[str, ...]] _not_set = Enum("_not_set", "NOT_SET") diff --git a/pydra/mark/shell.py b/pydra/mark/shell.py index d0cde91337..0f700d6970 100644 --- a/pydra/mark/shell.py +++ b/pydra/mark/shell.py @@ -95,7 +95,7 @@ def ensure_base_included(base_class: type, bases_list: list[type]): # Ensure bases are lists and can be modified ensure_base_included(pydra.engine.task.ShellCommandTask, bases) ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) - ensure_base_included(pydra.engine.specs.ShellOutSpec, outputs_bases) + ensure_base_included(pydra.engine.specs.ShellOutputs, outputs_bases) def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): annotations = {} @@ -161,7 +161,7 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): try: Outputs = klass.Outputs except AttributeError: - Outputs = type("Outputs", (pydra.engine.specs.ShellOutSpec,), {}) + Outputs = type("Outputs", (pydra.engine.specs.ShellOutputs,), {}) # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. # not in a base class) @@ -177,8 +177,8 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): template_fields = _gen_output_template_fields(Inputs, Outputs) - if not issubclass(Outputs, pydra.engine.specs.ShellOutSpec): - outputs_bases = (Outputs, pydra.engine.specs.ShellOutSpec) + if not issubclass(Outputs, pydra.engine.specs.ShellOutputs): + outputs_bases = (Outputs, pydra.engine.specs.ShellOutputs) add_base_class = True else: outputs_bases = (Outputs,) diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 3582fa9eda..0a65c780d7 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -65,7 +65,7 @@ def generic_func_task(in_file: File) -> File: ), ] generic_shelloutput_spec = specs.SpecInfo( - name="Output", fields=generic_shell_output_fields, bases=(specs.ShellOutSpec,) + name="Output", fields=generic_shell_output_fields, bases=(specs.ShellOutputs,) ) @@ -117,7 +117,7 @@ def specific_func_task(in_file: MyFormatX) -> MyFormatX: ), ] specific_shelloutput_spec = specs.SpecInfo( - name="Output", fields=specific_shell_output_fields, bases=(specs.ShellOutSpec,) + name="Output", fields=specific_shell_output_fields, bases=(specs.ShellOutputs,) ) @@ -171,7 +171,7 @@ def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: other_specific_shelloutput_spec = specs.SpecInfo( name="Output", fields=other_specific_shell_output_fields, - bases=(specs.ShellOutSpec,), + bases=(specs.ShellOutputs,), ) From 1b5035030850e0257d19eb859449e0d1f3c46faf Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 21:53:09 +1100 Subject: [PATCH 055/342] renamed FunctionTask -> PythonTask and ShellCommandTask -> ShellTask for consistent naming convention with Spec & Outputs classes --- pydra/design/python.py | 4 ++-- pydra/design/shell.py | 6 +++--- pydra/engine/boutiques.py | 4 ++-- pydra/engine/task.py | 4 ++-- pydra/mark/functions.py | 6 +++--- pydra/mark/shell.py | 8 ++++---- pydra/utils/tests/utils.py | 8 ++++---- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pydra/design/python.py b/pydra/design/python.py index 5f050ac11b..610979910c 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -102,7 +102,7 @@ def define( auto_attribs : bool Whether to use auto_attribs mode when creating the class. """ - from pydra.engine.task import FunctionTask + from pydra.engine.task import PythonTask from pydra.engine.specs import PythonSpec, PythonOutputs def make(wrapped: ty.Callable | type) -> PythonSpec: @@ -143,7 +143,7 @@ def make(wrapped: ty.Callable | type) -> PythonSpec: interface = make_task_spec( PythonSpec, PythonOutputs, - FunctionTask, + PythonTask, parsed_inputs, parsed_outputs, name=name, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 4c08612fcd..94e96b23ba 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -1,4 +1,4 @@ -"""Decorators and helper functions to create ShellCommandTasks used in Pydra workflows""" +"""Decorators and helper functions to create ShellTasks used in Pydra workflows""" from __future__ import annotations import typing as ty @@ -254,7 +254,7 @@ def define( ShellSpec The interface for the shell command """ - from pydra.engine.task import ShellCommandTask + from pydra.engine.task import ShellTask from pydra.engine.specs import ShellSpec, ShellOutputs def make( @@ -339,7 +339,7 @@ def make( interface = make_task_spec( ShellSpec, ShellOutputs, - ShellCommandTask, + ShellTask, parsed_inputs, parsed_outputs, name=class_name, diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py index 3f1b7bb4b2..8d7782b3e5 100644 --- a/pydra/engine/boutiques.py +++ b/pydra/engine/boutiques.py @@ -6,12 +6,12 @@ from functools import reduce from pydra.utils.messenger import AuditFlag -from pydra.engine.task import ShellCommandTask +from pydra.engine.task import ShellTask from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutputs, File, attrs_fields from .helpers_file import is_local_file -class BoshTask(ShellCommandTask): +class BoshTask(ShellTask): """Shell Command Task based on the Boutiques descriptor""" def __init__( diff --git a/pydra/engine/task.py b/pydra/engine/task.py index 378f8ae5c3..78ab415d38 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -70,7 +70,7 @@ from .environments import Native -class FunctionTask(Task): +class PythonTask(Task): """Wrap a Python callable as a task element.""" def _run_task(self, environment=None): @@ -95,7 +95,7 @@ def _run_task(self, environment=None): ) -class ShellCommandTask(Task): +class ShellTask(Task): """Wrap a shell command as a task element.""" def __init__( diff --git a/pydra/mark/functions.py b/pydra/mark/functions.py index d3bdaa9b03..c8b45fc265 100644 --- a/pydra/mark/functions.py +++ b/pydra/mark/functions.py @@ -30,7 +30,7 @@ def decorate(func): def task(func): """ - Promote a function to a :class:`~pydra.engine.task.FunctionTask`. + Promote a function to a :class:`~pydra.engine.task.PythonTask`. Example ------- @@ -40,10 +40,10 @@ def task(func): ... return a ** 2.0 """ - from pydra.engine.task import FunctionTask + from pydra.engine.task import PythonTask @wraps(func) def decorate(**kwargs): - return FunctionTask(func=func, **kwargs) + return PythonTask(func=func, **kwargs) return decorate diff --git a/pydra/mark/shell.py b/pydra/mark/shell.py index 0f700d6970..869a24362d 100644 --- a/pydra/mark/shell.py +++ b/pydra/mark/shell.py @@ -1,4 +1,4 @@ -"""Decorators and helper functions to create ShellCommandTasks used in Pydra workflows""" +"""Decorators and helper functions to create ShellTasks used in Pydra workflows""" from __future__ import annotations import typing as ty @@ -93,7 +93,7 @@ def ensure_base_included(base_class: type, bases_list: list[type]): pass # Ensure bases are lists and can be modified - ensure_base_included(pydra.engine.task.ShellCommandTask, bases) + ensure_base_included(pydra.engine.task.ShellTask, bases) ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) ensure_base_included(pydra.engine.specs.ShellOutputs, outputs_bases) @@ -140,8 +140,8 @@ def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): name = klass.__name__ bases = [klass] - if not issubclass(klass, pydra.engine.task.ShellCommandTask): - bases.append(pydra.engine.task.ShellCommandTask) + if not issubclass(klass, pydra.engine.task.ShellTask): + bases.append(pydra.engine.task.ShellTask) try: executable = klass.executable diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 0a65c780d7..9760704b38 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -1,7 +1,7 @@ from fileformats.generic import File from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber from pydra import mark -from pydra.engine.task import ShellCommandTask +from pydra.engine.task import ShellTask from pydra.engine import specs @@ -69,7 +69,7 @@ def generic_func_task(in_file: File) -> File: ) -class GenericShellTask(ShellCommandTask): +class GenericShellTask(ShellTask): input_spec = generic_shell_input_spec output_spec = generic_shelloutput_spec executable = "echo" @@ -121,7 +121,7 @@ def specific_func_task(in_file: MyFormatX) -> MyFormatX: ) -class SpecificShellTask(ShellCommandTask): +class SpecificShellTask(ShellTask): input_spec = specific_shell_input_spec output_spec = specific_shelloutput_spec executable = "echo" @@ -175,7 +175,7 @@ def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: ) -class OtherSpecificShellTask(ShellCommandTask): +class OtherSpecificShellTask(ShellTask): input_spec = other_specific_shell_input_spec output_spec = other_specific_shelloutput_spec executable = "echo" From 51c911b40555ed19e937d93176f6f290c23fadfd Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 22:24:30 +1100 Subject: [PATCH 056/342] debugged design unittests --- pydra/design/base.py | 6 +- pydra/design/shell.py | 10 +- pydra/design/tests/test_python.py | 38 +- pydra/design/tests/test_shell.py | 65 ++-- pydra/design/tests/test_workflow.py | 4 +- pydra/design/workflow.py | 4 +- pydra/engine/specs.py | 20 +- pydra/engine/tests/test_boutiques.py | 4 +- pydra/engine/tests/test_dockertask.py | 48 ++- pydra/engine/tests/test_environments.py | 46 +-- pydra/engine/tests/test_helpers_file.py | 4 +- pydra/engine/tests/test_nipype1_convert.py | 16 +- pydra/engine/tests/test_shelltask.py | 326 +++++++++--------- .../engine/tests/test_shelltask_inputspec.py | 164 ++++----- pydra/engine/tests/test_singularity.py | 42 +-- pydra/engine/tests/test_specs.py | 4 +- pydra/engine/tests/test_task.py | 24 +- pydra/engine/tests/test_workflow.py | 2 +- pydra/engine/workflow/base.py | 16 +- pydra/mark/tests/test_functions.py | 4 +- 20 files changed, 409 insertions(+), 438 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index f81dd6602a..d50f13c326 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -88,7 +88,7 @@ def satisfied(self, inputs: "TaskSpec") -> bool: return not self.allowed_values or value in self.allowed_values @classmethod - def parse(value: ty.Any) -> Self: + def parse(cls, value: ty.Any) -> Self: if isinstance(value, Requirement): return value elif isinstance(value, str): @@ -111,8 +111,10 @@ def __str__(self): def requirements_converter(value: ty.Any) -> list[Requirement]: """Ensure the requires field is a list of Requirement objects""" - if isinstance(value, (str, tuple, Requirement)): + if isinstance(value, Requirement): return [value] + elif isinstance(value, (str, tuple)): + return [Requirement.parse(value)] return [Requirement.parse(v) for v in value] diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 94e96b23ba..c38c333151 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -28,6 +28,11 @@ __all__ = ["arg", "out", "outarg", "define"] +EXECUTABLE_HELP_STRING = ( + "the first part of the command, can be a string, " + "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" +) + @attrs.define(kw_only=True) class arg(Arg): @@ -324,10 +329,7 @@ def make( argstr="", position=0, default=executable, - help_string=( - "the first part of the command, can be a string, " - "e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']" - ), + help_string=EXECUTABLE_HELP_STRING, ) # Set positions for the remaining inputs that don't have an explicit position diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 54dcd0fda4..58f99e1317 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -4,9 +4,9 @@ import attrs import pytest from pydra.engine.helpers import list_fields -from pydra.engine.specs import TaskSpec +from pydra.engine.specs import PythonSpec from pydra.design import python -from pydra.engine.task import FunctionTask +from pydra.engine.task import PythonTask sort_key = attrgetter("name") @@ -19,7 +19,7 @@ def func(a: int) -> float: SampleInterface = python.define(func) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, PythonSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -40,7 +40,7 @@ def func(a: int, k: float = 2.0) -> float: SampleInterface = python.define(func) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, PythonSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -65,7 +65,7 @@ def func(a: int) -> float: outputs={"b": python.out(help_string="the doubled output", type=Decimal)}, ) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, PythonSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -90,7 +90,7 @@ def func(a: int) -> int: outputs={"b": float}, ) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, PythonSpec) inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -110,8 +110,8 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" return a + b, a * b - assert issubclass(SampleInterface, TaskSpec) - assert SampleInterface.Task is FunctionTask + assert issubclass(SampleInterface, PythonSpec) + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -139,7 +139,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: d = a * b return c, d - assert SampleInterface.Task is FunctionTask + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -171,7 +171,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleInterface.Task is FunctionTask + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -206,7 +206,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleInterface.Task is FunctionTask + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -249,7 +249,7 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleInterface.Task is FunctionTask + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -296,8 +296,8 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, TaskSpec) - assert SampleInterface.Task is FunctionTask + assert issubclass(SampleInterface, PythonSpec) + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -321,7 +321,7 @@ def function(a, b): def test_interface_with_inheritance(): @python.define - class SampleInterface(TaskSpec["SampleInterface.Outputs"]): + class SampleInterface(PythonSpec["SampleInterface.Outputs"]): """Sample class for testing Args: @@ -347,7 +347,7 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, PythonSpec) def test_interface_with_class_no_auto_attribs(): @@ -368,7 +368,7 @@ class Outputs: def function(a, b): return a + b, a * b - assert SampleInterface.Task is FunctionTask + assert SampleInterface.Task is PythonTask inputs = sorted(list_fields(SampleInterface), key=sort_key) outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) assert inputs == [ @@ -397,7 +397,7 @@ def test_interface_invalid_wrapped1(): with pytest.raises(ValueError): @python.define(inputs={"a": python.arg()}) - class SampleInterface(TaskSpec["SampleInterface.Outputs"]): + class SampleInterface(PythonSpec["SampleInterface.Outputs"]): a: int class Outputs: @@ -412,7 +412,7 @@ def test_interface_invalid_wrapped2(): with pytest.raises(ValueError): @python.define(outputs={"b": python.out()}) - class SampleInterface(TaskSpec["SampleInterface.Outputs"]): + class SampleInterface(PythonSpec["SampleInterface.Outputs"]): a: int class Outputs: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 6d4dc3cac5..3b8e3fe7f3 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -1,11 +1,12 @@ import os +import typing as ty from pathlib import Path import attrs import pytest import cloudpickle as cp from pydra.design import shell from pydra.engine.helpers import list_fields -from pydra.engine.specs import TaskSpec +from pydra.engine.specs import ShellSpec from fileformats.generic import File, Directory, FsObject from fileformats import text, image from pydra.utils.typing import MultiInputObj @@ -15,16 +16,21 @@ def test_interface_template(): SampleInterface = shell.define("cp ") - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, ShellSpec) output = shell.outarg( name="out_path", path_template="out_path", - default=True, type=FsObject, position=2, ) assert sorted_fields(SampleInterface) == [ - shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg( + name="executable", + default="cp", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), shell.arg(name="in_path", type=FsObject, position=1), output, ] @@ -41,16 +47,21 @@ def test_interface_template_w_types_and_path_template_ext(): "trim-png " ) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, ShellSpec) output = shell.outarg( name="out_image", path_template="out_image.png", - default=True, type=image.Png, position=2, ) assert sorted_fields(SampleInterface) == [ - shell.arg(name="executable", default="trim-png", type=str, position=0), + shell.arg( + name="executable", + default="trim-png", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), shell.arg(name="in_image", type=image.Png, position=1), output, ] @@ -72,16 +83,21 @@ def test_interface_template_more_complex(): ), ) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, ShellSpec) output = shell.outarg( name="out_dir", type=Directory, path_template="out_dir", position=2, - default=True, ) assert sorted_fields(SampleInterface) == [ - shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg( + name="executable", + default="cp", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), @@ -120,16 +136,21 @@ def test_interface_template_with_overrides(): outputs={"out_dir": shell.outarg(position=-1)}, ) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, ShellSpec) output = shell.outarg( name="out_dir", type=Directory, path_template="out_dir", position=-1, - default=True, ) assert sorted_fields(SampleInterface) == [ - shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg( + name="executable", + default="cp", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), @@ -167,16 +188,21 @@ def test_interface_template_with_type_overrides(): inputs={"text_arg": str, "int_arg": int | None}, ) - assert issubclass(SampleInterface, TaskSpec) + assert issubclass(SampleInterface, ShellSpec) output = shell.outarg( name="out_dir", type=Directory, path_template="out_dir", position=2, - default=True, ) assert sorted_fields(SampleInterface) == [ - shell.arg(name="executable", default="cp", type=str, position=0), + shell.arg( + name="executable", + default="cp", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), @@ -199,7 +225,7 @@ def Ls(request): if request.param == "static": @shell.define - class Ls(TaskSpec["Ls.Outputs"]): + class Ls(ShellSpec["Ls.Outputs"]): executable = "ls" directory: Directory = shell.arg( @@ -438,7 +464,6 @@ class Outputs: type=File, help_string="the output file", path_template="{x}_out", - default=True, argstr="", position=-1, ) @@ -446,9 +471,10 @@ class Outputs: shell.arg( name="executable", default="cp", - type=str, + type=str | ty.Sequence[str], argstr="", position=0, + help_string=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="x", @@ -480,7 +506,6 @@ def test_shell_output_field_name_dynamic(): help_string="path of output file", argstr="", path_template="{x}_out", - default=True, ), }, ) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 73acdf0396..1480bdbe6f 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -6,7 +6,7 @@ import typing as ty from pydra.design import shell, python, workflow from pydra.engine.helpers import list_fields -from pydra.engine.specs import WorkflowSpec, WorkflowOutSpec +from pydra.engine.specs import WorkflowSpec, WorkflowOutputs from fileformats import video, image # NB: We use PascalCase for interfaces and workflow functions as it is translated into a class @@ -152,7 +152,7 @@ def constructor(a, b): mul = workflow.add(Mul(a=add.out, b=b)) return mul.out - class Outputs(WorkflowOutSpec): + class Outputs(WorkflowOutputs): out: float constructor = MyTestWorkflow().constructor diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index abe1320221..564ab8e09f 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -191,10 +191,10 @@ def this() -> "Workflow": return Workflow.under_construction -OutSpecType = ty.TypeVar("OutSpecType", bound="Outputs") +OutputsType = ty.TypeVar("OutputsType", bound="Outputs") -def add(task_spec: "TaskSpec[OutSpecType]", name: str = None) -> OutSpecType: +def add(task_spec: "TaskSpec[OutputsType]", name: str = None) -> OutputsType: """Add a node to the workflow currently being constructed Parameters diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index fb74e2b5d0..4960c88c82 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -93,10 +93,10 @@ def combine( return self -OutSpecType = ty.TypeVar("OutputType", bound=Outputs) +OutputsType = ty.TypeVar("OutputType", bound=Outputs) -class TaskSpec(ty.Generic[OutSpecType]): +class TaskSpec(ty.Generic[OutputsType]): """Base class for all task specifications""" Task: "ty.Type[core.Task]" @@ -222,8 +222,7 @@ def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: input_names = set(inputs) for field in itertools.chain(inputs.values(), outputs.values()): if unrecognised := ( - set([r.name for s in field.requires for r in s.requirements]) - - input_names + set([r.name for s in field.requires for r in s]) - input_names ): raise ValueError( "'Unrecognised' field names in referenced in the requirements " @@ -318,10 +317,10 @@ class PythonOutputs(Outputs): pass -PythonOutSpecType = ty.TypeVar("OutputType", bound=PythonOutputs) +PythonOutputsType = ty.TypeVar("OutputType", bound=PythonOutputs) -class PythonSpec(TaskSpec[PythonOutSpecType]): +class PythonSpec(TaskSpec[PythonOutputsType]): pass @@ -329,10 +328,10 @@ class WorkflowOutputs(Outputs): pass -WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutputs) +WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) -class WorkflowSpec(TaskSpec[WorkflowOutSpecType]): +class WorkflowSpec(TaskSpec[WorkflowOutputsType]): pass @@ -538,7 +537,10 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellSpec") -> bool return any(rs.satisfied(inputs) for rs in requirements) -class ShellSpec(TaskSpec): +ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) + + +class ShellSpec(TaskSpec[ShellOutputsType]): pass diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index 48f484b687..28da1f176a 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -4,7 +4,7 @@ import pytest from ..core import Workflow -from ..task import ShellCommandTask +from ..task import ShellTask from ..submitter import Submitter from ..boutiques import BoshTask from .utils import result_no_submitter, result_submitter, no_win @@ -151,7 +151,7 @@ def test_boutiques_wf_2(maskfile, plugin, tmpdir, infile): name="stat", zenodo_id="4472771", input_file=wf.bet.lzout.outfile, v=True ) ) - wf.add(ShellCommandTask(name="cat", executable="cat", args=wf.stat.lzout.output)) + wf.add(ShellTask(name="cat", executable="cat", args=wf.stat.lzout.output)) wf.set_output( [ diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 5ccf37e292..5f69584d60 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -2,10 +2,10 @@ import pytest import attr -from ..task import ShellCommandTask +from ..task import ShellTask from ..submitter import Submitter from ..core import Workflow -from ..specs import ShellOutSpec, SpecInfo, File, ShellSpec +from ..specs import ShellOutputs, SpecInfo, File, ShellSpec from ..environments import Docker from .utils import no_win, need_docker, result_submitter, result_no_submitter @@ -17,9 +17,7 @@ def test_docker_1_nosubm(): no submitter """ cmd = "whoami" - docky = ShellCommandTask( - name="docky", executable=cmd, environment=Docker(image="busybox") - ) + docky = ShellTask(name="docky", executable=cmd, environment=Docker(image="busybox")) assert docky.environment.image == "busybox" assert docky.environment.tag == "latest" assert isinstance(docky.environment, Docker) @@ -37,9 +35,7 @@ def test_docker_1(plugin): using submitter """ cmd = "whoami" - docky = ShellCommandTask( - name="docky", executable=cmd, environment=Docker(image="busybox") - ) + docky = ShellTask(name="docky", executable=cmd, environment=Docker(image="busybox")) with Submitter(plugin=plugin) as sub: docky(submitter=sub) @@ -57,9 +53,7 @@ def test_docker_2(results_function, plugin): with and without submitter """ cmd = ["echo", "hail", "pydra"] - docky = ShellCommandTask( - name="docky", executable=cmd, environment=Docker(image="busybox") - ) + docky = ShellTask(name="docky", executable=cmd, environment=Docker(image="busybox")) # cmdline doesn't know anything about docker assert docky.cmdline == " ".join(cmd) res = results_function(docky, plugin) @@ -77,7 +71,7 @@ def test_docker_2a(results_function, plugin): cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - docky = ShellCommandTask( + docky = ShellTask( name="docky", executable=cmd_exec, args=cmd_args, @@ -102,7 +96,7 @@ def test_docker_st_1(results_function, plugin): splitter = executable """ cmd = ["pwd", "whoami"] - docky = ShellCommandTask(name="docky", environment=Docker(image="busybox")).split( + docky = ShellTask(name="docky", environment=Docker(image="busybox")).split( "executable", executable=cmd ) assert docky.state.splitter == "docky.executable" @@ -127,9 +121,9 @@ def test_docker_outputspec_1(plugin, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="ubuntu"), executable=cmd, @@ -175,7 +169,7 @@ def test_docker_inputspec_1(tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -215,7 +209,7 @@ def test_docker_inputspec_1a(tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -271,7 +265,7 @@ def test_docker_inputspec_2(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -330,7 +324,7 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -391,7 +385,7 @@ def test_docker_inputspec_2a(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -434,7 +428,7 @@ def test_docker_inputspec_3(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -492,7 +486,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -547,7 +541,7 @@ def test_docker_inputspec_state_1(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -596,7 +590,7 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): bases=(ShellSpec,), ) - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=cmd, @@ -642,7 +636,7 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): wf.inputs.cmd = cmd wf.inputs.file = filename - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=wf.lzin.cmd, @@ -697,7 +691,7 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): wf.split(file=[str(file_1), str(file_2)]) wf.inputs.cmd = cmd - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=wf.lzin.cmd, @@ -752,7 +746,7 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): wf = Workflow(name="wf", input_spec=["cmd", "file"]) wf.inputs.cmd = cmd - docky = ShellCommandTask( + docky = ShellTask( name="docky", environment=Docker(image="busybox"), executable=wf.lzin.cmd, diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index bd05d9daed..be5c7393d3 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,7 +1,7 @@ from pathlib import Path from ..environments import Native, Docker, Singularity -from ..task import ShellCommandTask +from ..task import ShellTask from ..submitter import Submitter from ..specs import ( ShellSpec, @@ -25,22 +25,20 @@ def test_native_1(tmp_path): newcache = lambda x: makedir(tmp_path, x) cmd = ["whoami"] - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) + shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = Native().execute(shelly) shelly() assert env_res == shelly.output_ - shelly_call = ShellCommandTask( + shelly_call = ShellTask( name="shelly_call", executable=cmd, cache_dir=newcache("shelly_call") ) shelly_call(environment=Native()) assert env_res == shelly_call.output_ - shelly_subm = ShellCommandTask( + shelly_subm = ShellTask( name="shelly_subm", executable=cmd, cache_dir=newcache("shelly_subm") ) with Submitter(plugin="cf") as sub: @@ -56,13 +54,11 @@ def test_docker_1(tmp_path): cmd = ["whoami"] docker = Docker(image="busybox") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) + shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = docker.execute(shelly) - shelly_env = ShellCommandTask( + shelly_env = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -71,7 +67,7 @@ def test_docker_1(tmp_path): shelly_env() assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ - shelly_call = ShellCommandTask( + shelly_call = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) shelly_call(environment=docker) @@ -94,13 +90,11 @@ def test_docker_1_subm(tmp_path, docker): cmd = ["whoami"] docker = Docker(image="busybox") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) + shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = docker.execute(shelly) - shelly_env = ShellCommandTask( + shelly_env = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -110,7 +104,7 @@ def test_docker_1_subm(tmp_path, docker): shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ - shelly_call = ShellCommandTask( + shelly_call = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) with Submitter(plugin="cf") as sub: @@ -126,13 +120,11 @@ def test_singularity_1(tmp_path): cmd = ["whoami"] sing = Singularity(image="docker://alpine") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) + shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = sing.execute(shelly) - shelly_env = ShellCommandTask( + shelly_env = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -141,7 +133,7 @@ def test_singularity_1(tmp_path): shelly_env() assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ - shelly_call = ShellCommandTask( + shelly_call = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) shelly_call(environment=sing) @@ -156,13 +148,11 @@ def test_singularity_1_subm(tmp_path, plugin): cmd = ["whoami"] sing = Singularity(image="docker://alpine") - shelly = ShellCommandTask( - name="shelly", executable=cmd, cache_dir=newcache("shelly") - ) + shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = sing.execute(shelly) - shelly_env = ShellCommandTask( + shelly_env = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -172,7 +162,7 @@ def test_singularity_1_subm(tmp_path, plugin): shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ - shelly_call = ShellCommandTask( + shelly_call = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) with Submitter(plugin=plugin) as sub: @@ -206,7 +196,7 @@ def create_shelly_inputfile(tempdir, filename, name, executable): ) kwargs = {} if filename is None else {"file": filename} - shelly = ShellCommandTask( + shelly = ShellTask( name=name, executable=executable, cache_dir=makedir(tempdir, name), @@ -399,7 +389,7 @@ def create_shelly_outputfile(tempdir, filename, name, executable="cp"): ) kwargs = {} if filename is None else {"file_orig": filename} - shelly = ShellCommandTask( + shelly = ShellTask( name=name, executable=executable, cache_dir=makedir(tempdir, name), diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index ea5dd2afdc..915d183973 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -6,7 +6,7 @@ import pytest from fileformats.generic import File from ..specs import SpecInfo, ShellSpec -from ..task import ShellCommandTask +from ..task import ShellTask from ..helpers_file import ( ensure_list, MountIndentifier, @@ -388,7 +388,7 @@ def test_output_template(tmp_path): bases=(ShellSpec,), ) - class MyCommand(ShellCommandTask): + class MyCommand(ShellTask): executable = "my" input_spec = my_input_spec diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index 8408fddb6c..4dc6f80369 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -2,25 +2,25 @@ import pytest -from ..task import ShellCommandTask -from ..specs import ShellOutSpec, ShellSpec, SpecInfo, File +from ..task import ShellTask +from ..specs import ShellOutputs, ShellSpec, SpecInfo, File interf_input_spec = SpecInfo( name="Input", fields=[("test", ty.Any, {"help_string": "test"})], bases=(ShellSpec,) ) interf_output_spec = SpecInfo( - name="Output", fields=[("test_out", File, "*.txt")], bases=(ShellOutSpec,) + name="Output", fields=[("test_out", File, "*.txt")], bases=(ShellOutputs,) ) -class Interf_1(ShellCommandTask): +class Interf_1(ShellTask): """class with customized input/output specs""" input_spec = interf_input_spec output_spec = interf_output_spec -class Interf_2(ShellCommandTask): +class Interf_2(ShellTask): """class with customized input/output specs and executables""" input_spec = interf_input_spec @@ -28,7 +28,7 @@ class Interf_2(ShellCommandTask): executable = "testing command" -class Interf_3(ShellCommandTask): +class Interf_3(ShellTask): """class with customized input and executables""" input_spec = SpecInfo( @@ -45,7 +45,7 @@ class Interf_3(ShellCommandTask): executable = "testing command" -class TouchInterf(ShellCommandTask): +class TouchInterf(ShellTask): """class with customized input and executables""" input_spec = SpecInfo( @@ -81,7 +81,7 @@ def test_interface_specs_2(): bases=(ShellSpec,), ) my_output_spec = SpecInfo( - name="Output", fields=[("my_out", File, "*.txt")], bases=(ShellOutSpec,) + name="Output", fields=[("my_out", File, "*.txt")], bases=(ShellOutputs,) ) task = Interf_1(input_spec=my_input_spec, output_spec=my_output_spec) assert task.input_spec == my_input_spec diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 4857db094f..631f72ff73 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -7,11 +7,11 @@ import re import stat -from ..task import ShellCommandTask +from ..task import ShellTask from ..submitter import Submitter from ..core import Workflow from ..specs import ( - ShellOutSpec, + ShellOutputs, ShellSpec, SpecInfo, File, @@ -31,7 +31,7 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): """simple command, no arguments""" cmd = ["pwd"] - shelly = ShellCommandTask(name="shelly", executable=cmd, cache_dir=tmp_path) + shelly = ShellTask(name="shelly", executable=cmd, cache_dir=tmp_path) assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin=plugin_dask_opt) @@ -46,7 +46,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): strip option to remove \n at the end os stdout """ cmd = ["pwd"] - shelly = ShellCommandTask(name="shelly", executable=cmd, strip=True) + shelly = ShellTask(name="shelly", executable=cmd, strip=True) shelly.cache_dir = tmp_path assert shelly.cmdline == " ".join(cmd) @@ -60,7 +60,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): def test_shell_cmd_2(plugin, results_function, tmp_path): """a command with arguments, cmd and args given as executable""" cmd = ["echo", "hail", "pydra"] - shelly = ShellCommandTask(name="shelly", executable=cmd) + shelly = ShellTask(name="shelly", executable=cmd) shelly.cache_dir = tmp_path assert shelly.cmdline == " ".join(cmd) @@ -76,7 +76,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) + shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) @@ -93,7 +93,7 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_args = "pydra" # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) + shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.cmdline == "echo pydra" @@ -115,7 +115,7 @@ def test_shell_cmd_3(plugin_dask_opt, tmp_path): cmd = ["pwd", "whoami"] # all args given as executable - shelly = ShellCommandTask(name="shelly").split("executable", executable=cmd) + shelly = ShellTask(name="shelly").split("executable", executable=cmd) shelly.cache_dir = tmp_path # assert shelly.cmdline == ["pwd", "whoami"] @@ -137,7 +137,7 @@ def test_shell_cmd_4(plugin, tmp_path): cmd_exec = "echo" cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec).split( + shelly = ShellTask(name="shelly", executable=cmd_exec).split( splitter="args", args=cmd_args ) shelly.cache_dir = tmp_path @@ -162,7 +162,7 @@ def test_shell_cmd_5(plugin, tmp_path): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellCommandTask(name="shelly", executable=cmd_exec) + ShellTask(name="shelly", executable=cmd_exec) .split(splitter="args", args=cmd_args) .combine("args") ) @@ -184,7 +184,7 @@ def test_shell_cmd_6(plugin, tmp_path): cmd_exec = ["echo", ["echo", "-n"]] cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly").split( + shelly = ShellTask(name="shelly").split( splitter=["executable", "args"], executable=cmd_exec, args=cmd_args ) shelly.cache_dir = tmp_path @@ -228,7 +228,7 @@ def test_shell_cmd_7(plugin, tmp_path): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellCommandTask(name="shelly") + ShellTask(name="shelly") .split(splitter=["executable", "args"], executable=cmd_exec, args=cmd_args) .combine("args") ) @@ -254,9 +254,9 @@ def test_wf_shell_cmd_1(plugin, tmp_path): wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) wf.inputs.cmd1 = "pwd" wf.inputs.cmd2 = "ls" - wf.add(ShellCommandTask(name="shelly_pwd", executable=wf.lzin.cmd1, strip=True)) + wf.add(ShellTask(name="shelly_pwd", executable=wf.lzin.cmd1, strip=True)) wf.add( - ShellCommandTask( + ShellTask( name="shelly_ls", executable=wf.lzin.cmd2, args=wf.shelly_pwd.lzout.stdout ) ) @@ -299,7 +299,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, args=cmd_args, @@ -347,7 +347,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, args=cmd_args, @@ -388,7 +388,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, text=hello, @@ -421,7 +421,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, text=hello, @@ -459,7 +459,7 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) shelly.inputs.text = hello @@ -492,7 +492,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -526,7 +526,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -556,7 +556,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -582,7 +582,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -613,7 +613,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -651,7 +651,7 @@ def test_shell_cmd_inputspec_4c_exception(plugin): with pytest.raises( Exception, match=r"default value \('Hello'\) should not be set when the field" ): - ShellCommandTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + ShellTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) def test_shell_cmd_inputspec_4d_exception(plugin): @@ -681,7 +681,7 @@ def test_shell_cmd_inputspec_4d_exception(plugin): with pytest.raises( Exception, match=r"default value \('Hello'\) should not be set together" ) as excinfo: - ShellCommandTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + ShellTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) @@ -721,7 +721,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -769,7 +769,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -817,7 +817,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -862,7 +862,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, opt_t=cmd_t, input_spec=my_input_spec ) with pytest.raises(Exception) as excinfo: @@ -905,7 +905,7 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -945,7 +945,7 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, args=args, @@ -990,7 +990,7 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, args=args, @@ -1037,7 +1037,7 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, newfile="newfile_tmp.txt", @@ -1076,7 +1076,7 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, args=args, @@ -1134,7 +1134,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, newfile="newfile_tmp.txt", @@ -1191,7 +1191,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, newfile="newfile_tmp.txt", @@ -1242,7 +1242,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1295,7 +1295,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, file_orig=file ) @@ -1343,7 +1343,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1394,7 +1394,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1446,7 +1446,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1497,7 +1497,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, files=files_list, @@ -1546,7 +1546,7 @@ def test_shell_cmd_inputspec_10_err(tmp_path): ) with pytest.raises(FileNotFoundError): - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, files=file_2, input_spec=my_input_spec ) @@ -1579,9 +1579,9 @@ def test_shell_cmd_inputspec_11(tmp_path): ] input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellSpec,)) - output_spec = SpecInfo(name="Output", fields=output_fields, bases=(ShellOutSpec,)) + output_spec = SpecInfo(name="Output", fields=output_fields, bases=(ShellOutputs,)) - task = ShellCommandTask( + task = ShellTask( name="echoMultiple", executable="touch", input_spec=input_spec, @@ -1657,7 +1657,7 @@ def template_function(inputs): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1701,7 +1701,7 @@ def test_shell_cmd_inputspec_with_iterable(): bases=(ShellSpec,), ) - task = ShellCommandTask(name="test", input_spec=input_spec, executable="test") + task = ShellTask(name="test", input_spec=input_spec, executable="test") for iterable_type in (list, tuple): task.inputs.iterable_1 = iterable_type(range(3)) @@ -1751,7 +1751,7 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1813,7 +1813,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1891,7 +1891,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1933,7 +1933,7 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, @@ -1968,7 +1968,7 @@ def test_shell_cmd_inputspec_typeval_1(): ) with pytest.raises(TypeError): - ShellCommandTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) + ShellTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) def test_shell_cmd_inputspec_typeval_2(): @@ -1984,7 +1984,7 @@ def test_shell_cmd_inputspec_typeval_2(): ) with pytest.raises(TypeError): - ShellCommandTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) + ShellTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) @@ -2006,7 +2006,7 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, @@ -2044,7 +2044,7 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -2090,7 +2090,7 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, @@ -2150,7 +2150,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -2203,7 +2203,7 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): ) wf.add( - ShellCommandTask( + ShellTask( name="shelly", input_spec=my_input_spec, executable=wf.lzin.cmd, @@ -2250,7 +2250,7 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): ) wf.add( - ShellCommandTask( + ShellTask( name="shelly", input_spec=my_input_spec, executable=wf.lzin.cmd, @@ -2328,7 +2328,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ) wf.add( - ShellCommandTask( + ShellTask( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, @@ -2336,7 +2336,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ) ) wf.add( - ShellCommandTask( + ShellTask( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2425,7 +2425,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ) wf.add( - ShellCommandTask( + ShellTask( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, @@ -2433,7 +2433,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ) ) wf.add( - ShellCommandTask( + ShellTask( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2520,7 +2520,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ) wf.add( - ShellCommandTask( + ShellTask( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, @@ -2528,7 +2528,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ) ) wf.add( - ShellCommandTask( + ShellTask( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2618,14 +2618,14 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): ) wf.add( - ShellCommandTask( + ShellTask( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, ).split("args", args=wf.lzin.args) ) wf.add( - ShellCommandTask( + ShellTask( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2664,9 +2664,9 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2684,9 +2684,9 @@ def test_shell_cmd_outputspec_1a(plugin, results_function, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", attr.ib(type=File, default="newfile_tmp.txt"))], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2703,9 +2703,9 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_tmp_.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2725,9 +2725,9 @@ def test_shell_cmd_outputspec_2(plugin, results_function, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_*.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2745,9 +2745,9 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_*K.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2767,9 +2767,9 @@ def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", MultiOutputFile, "newfile_*.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2801,9 +2801,9 @@ def gather_output(field, output_dir): attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2840,9 +2840,9 @@ def gather_output(executable, output_dir): attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2868,9 +2868,9 @@ def gather_output(executable, output_dir, ble): my_output_spec = SpecInfo( name="Output", fields=[("newfile", attr.ib(type=File, metadata={"callable": gather_output}))], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask(name="shelly", executable=cmd, output_spec=my_output_spec) + shelly = ShellTask(name="shelly", executable=cmd, output_spec=my_output_spec) with pytest.raises(AttributeError, match="ble"): shelly() @@ -2883,7 +2883,7 @@ def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): """ @attr.s(kw_only=True) - class MyOutputSpec(ShellOutSpec): + class MyOutputSpec(ShellOutputs): @staticmethod def gather_output(executable, output_dir): files = executable[1:] @@ -2891,7 +2891,7 @@ def gather_output(executable, output_dir): newfile: MultiOutputFile = attr.ib(metadata={"callable": gather_output}) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"], output_spec=SpecInfo(name="Output", bases=(MyOutputSpec,)), @@ -2928,10 +2928,10 @@ def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, args=args, @@ -2961,10 +2961,10 @@ def test_shell_cmd_outputspec_6a(): {"output_file_template": "{args}", "help_string": "output file"}, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, args=args, output_spec=my_output_spec ) @@ -3031,10 +3031,10 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3107,10 +3107,10 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3176,10 +3176,10 @@ def get_stderr(stderr): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ).split("args", args=args) @@ -3207,11 +3207,11 @@ def test_shell_cmd_outputspec_8b_error(): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), + ) + shelly = ShellTask(name="shelly", executable=cmd, output_spec=my_output_spec).split( + "args", args=args ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec - ).split("args", args=args) with pytest.raises(Exception) as e: shelly() assert "has to have a callable" in str(e.value) @@ -3243,10 +3243,10 @@ def get_lowest_directory(directory_path): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, @@ -3304,10 +3304,10 @@ def get_lowest_directory(directory_path): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name=cmd, executable=cmd, input_spec=my_input_spec, @@ -3351,10 +3351,10 @@ def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, output_spec=my_output_spec, @@ -3384,13 +3384,9 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), - ) - wf.add( - ShellCommandTask( - name="shelly", executable=wf.lzin.cmd, output_spec=my_output_spec - ) + bases=(ShellOutputs,), ) + wf.add(ShellTask(name="shelly", executable=wf.lzin.cmd, output_spec=my_output_spec)) wf.set_output( [("stdout", wf.shelly.lzout.stdout), ("newfile", wf.shelly.lzout.newfile)] ) @@ -3441,9 +3437,9 @@ def test_shell_cmd_inputspec_outputspec_1(): {"output_file_template": "{file2}", "help_string": "newfile 2"}, ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3495,9 +3491,9 @@ def test_shell_cmd_inputspec_outputspec_1a(): {"output_file_template": "{file2}", "help_string": "newfile 2"}, ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3556,9 +3552,9 @@ def test_shell_cmd_inputspec_outputspec_2(): }, ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3623,9 +3619,9 @@ def test_shell_cmd_inputspec_outputspec_2a(): }, ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3695,9 +3691,9 @@ def test_shell_cmd_inputspec_outputspec_3(): }, ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3756,9 +3752,9 @@ def test_shell_cmd_inputspec_outputspec_3a(): }, ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3820,9 +3816,9 @@ def test_shell_cmd_inputspec_outputspec_4(): }, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3875,9 +3871,9 @@ def test_shell_cmd_inputspec_outputspec_4a(): }, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3930,9 +3926,9 @@ def test_shell_cmd_inputspec_outputspec_5(): }, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3984,9 +3980,9 @@ def test_shell_cmd_inputspec_outputspec_5a(): }, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -4038,9 +4034,9 @@ def test_shell_cmd_inputspec_outputspec_5b(): }, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -4087,9 +4083,9 @@ def test_shell_cmd_inputspec_outputspec_6_except(): }, ) ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -4338,7 +4334,7 @@ def change_name(file): in_file = data_tests_dir / "test.nii.gz" # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( name="bet_task", executable="bet", in_file=in_file, input_spec=bet_input_spec ) out_file = shelly.output_dir / "test_brain.nii.gz" @@ -4387,7 +4383,7 @@ def test_shell_cmd_optional_output_file1(tmp_path): bases=(ShellSpec,), ) - my_cp = ShellCommandTask( + my_cp = ShellTask( name="my_cp", executable="cp", input_spec=my_cp_spec, @@ -4427,7 +4423,7 @@ def test_shell_cmd_optional_output_file2(tmp_path): bases=(ShellSpec,), ) - my_cp = ShellCommandTask( + my_cp = ShellTask( name="my_cp", executable="cp", input_spec=my_cp_spec, @@ -4488,10 +4484,10 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( cache_dir=tmp_path, executable="echo", input_spec=input_spec, @@ -4550,10 +4546,10 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4617,10 +4613,10 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4685,10 +4681,10 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4738,10 +4734,10 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( cache_dir=tmp_path, executable="echo", input_spec=input_spec, @@ -4792,10 +4788,10 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): ), ), ], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - shelly = ShellCommandTask( + shelly = ShellTask( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4868,13 +4864,11 @@ def formatter_1(inputs): return f"-t [{inputs['in1']}, {inputs['in2']}]" input_spec = spec_info(formatter_1) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" # testing that the formatter can overwrite a provided value for together. - shelly = ShellCommandTask( + shelly = ShellTask( executable="exec", input_spec=input_spec, in1="i1", @@ -4890,9 +4884,7 @@ def formatter_2(in1, in2): input_spec = spec_info(formatter_2) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" def formatter_3(in1, in3): @@ -4901,9 +4893,7 @@ def formatter_3(in1, in3): input_spec = spec_info(formatter_3) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") with pytest.raises(Exception) as excinfo: shelly.cmdline assert ( @@ -4919,7 +4909,7 @@ def formatter_5(field): input_spec = spec_info(formatter_5) - shelly = ShellCommandTask( + shelly = ShellTask( executable="exec", input_spec=input_spec, in1="i1", @@ -4936,9 +4926,7 @@ def formatter_4(field): input_spec = spec_info(formatter_4) - shelly = ShellCommandTask( - executable="exec", input_spec=input_spec, in1="i1", in2="i2" - ) + shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") assert shelly.cmdline == "exec" @@ -4990,7 +4978,7 @@ def formatter_1(in1, in2): input_spec = spec_info(formatter_1) in1 = ["in11", "in12"] - shelly = ShellCommandTask( + shelly = ShellTask( name="f", executable="executable", input_spec=input_spec, in2="in2" ).split("in1", in1=in1) assert shelly is not None @@ -5037,7 +5025,7 @@ def test_shellcommand_error_msg(tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="err_msg", executable=str(script_path), input_spec=input_spec, in1="hello" ) diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 9bc7f7a232..53071d65c5 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -3,9 +3,9 @@ import attr import pytest -from ..task import ShellCommandTask +from ..task import ShellTask from ..specs import ( - ShellOutSpec, + ShellOutputs, ShellSpec, SpecInfo, File, @@ -15,14 +15,14 @@ def test_shell_cmd_execargs_1(): # separate command into exec + args - shelly = ShellCommandTask(executable="executable", args="arg") + shelly = ShellTask(executable="executable", args="arg") assert shelly.cmdline == "executable arg" assert shelly.name == "ShellTask_noname" def test_shell_cmd_execargs_2(): # separate command into exec + args - shelly = ShellCommandTask(executable=["cmd_1", "cmd_2"], args="arg") + shelly = ShellTask(executable=["cmd_1", "cmd_2"], args="arg") assert shelly.cmdline == "cmd_1 cmd_2 arg" @@ -42,7 +42,7 @@ def test_shell_cmd_inputs_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", args="arg", inpA="inp1", input_spec=my_input_spec ) assert shelly.cmdline == "executable inp1 arg" @@ -58,7 +58,7 @@ def test_shell_cmd_inputs_1a(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", args="arg", inpA="inpNone1", input_spec=my_input_spec ) # inp1 should be the first one after executable @@ -82,7 +82,7 @@ def test_shell_cmd_inputs_1b(): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", args="arg", inpA="inp-1", input_spec=my_input_spec ) # inp1 should be last before arg @@ -105,7 +105,7 @@ def test_shell_cmd_inputs_1_st(): bases=(ShellSpec,), ) - ShellCommandTask( + ShellTask( name="shelly", executable="executable", args="arg", @@ -140,7 +140,7 @@ def test_shell_cmd_inputs_2(): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpB="inp1", inpA="inp2", input_spec=my_input_spec ) assert shelly.cmdline == "executable inp1 inp2" @@ -158,7 +158,7 @@ def test_shell_cmd_inputs_2a(): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA="inpNone1", inpB="inpNone2", @@ -191,7 +191,7 @@ def test_shell_cmd_inputs_2_err(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA="inp1", inpB="inp2", input_spec=my_input_spec ) with pytest.raises(Exception) as e: @@ -224,9 +224,7 @@ def test_shell_cmd_inputs_2_noerr(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", inpA="inp1", input_spec=my_input_spec - ) + shelly = ShellTask(executable="executable", inpA="inp1", input_spec=my_input_spec) shelly.cmdline @@ -255,7 +253,7 @@ def test_shell_cmd_inputs_3(): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA="inp1", inpB="inp-1", @@ -282,9 +280,7 @@ def test_shell_cmd_inputs_argstr_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", inpA="inp1", input_spec=my_input_spec - ) + shelly = ShellTask(executable="executable", inpA="inp1", input_spec=my_input_spec) # flag used before inp1 assert shelly.cmdline == "executable -v inp1" @@ -306,7 +302,7 @@ def test_shell_cmd_inputs_argstr_2(): ) # separate command into exec + args - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", args="arg", inpA=True, input_spec=my_input_spec ) # a flag is used without any additional argument @@ -329,7 +325,7 @@ def test_shell_cmd_inputs_list_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec ) # multiple elements @@ -352,7 +348,7 @@ def test_shell_cmd_inputs_list_2(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec ) assert shelly.cmdline == "executable -v el_1 el_2 el_3" @@ -374,7 +370,7 @@ def test_shell_cmd_inputs_list_3(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec ) # a flag is repeated @@ -402,7 +398,7 @@ def test_shell_cmd_inputs_list_sep_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -432,7 +428,7 @@ def test_shell_cmd_inputs_list_sep_2(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -462,7 +458,7 @@ def test_shell_cmd_inputs_list_sep_2a(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -492,7 +488,7 @@ def test_shell_cmd_inputs_list_sep_3(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -522,7 +518,7 @@ def test_shell_cmd_inputs_list_sep_3a(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -552,9 +548,7 @@ def test_shell_cmd_inputs_sep_4(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", inpA=["aaa"], input_spec=my_input_spec - ) + shelly = ShellTask(executable="executable", inpA=["aaa"], input_spec=my_input_spec) assert shelly.cmdline == "executable -v aaa" @@ -579,9 +573,7 @@ def test_shell_cmd_inputs_sep_4a(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", inpA="aaa", input_spec=my_input_spec - ) + shelly = ShellTask(executable="executable", inpA="aaa", input_spec=my_input_spec) assert shelly.cmdline == "executable -v aaa" @@ -605,9 +597,7 @@ def test_shell_cmd_inputs_format_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", inpA="aaa", input_spec=my_input_spec - ) + shelly = ShellTask(executable="executable", inpA="aaa", input_spec=my_input_spec) assert shelly.cmdline == "executable -v aaa" @@ -631,7 +621,7 @@ def test_shell_cmd_inputs_format_2(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", inpA=["el_1", "el_2"], input_spec=my_input_spec, @@ -659,9 +649,7 @@ def test_shell_cmd_inputs_format_3(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", inpA=0.007, input_spec=my_input_spec - ) + shelly = ShellTask(executable="executable", inpA=0.007, input_spec=my_input_spec) assert shelly.cmdline == "executable -v 0.00700" @@ -686,7 +674,7 @@ def test_shell_cmd_inputs_mandatory_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask(executable="executable", input_spec=my_input_spec) + shelly = ShellTask(executable="executable", input_spec=my_input_spec) with pytest.raises(Exception) as e: shelly.cmdline assert "mandatory" in str(e.value) @@ -729,9 +717,7 @@ def test_shell_cmd_inputs_not_given_1(): ], bases=(ShellSpec,), ) - shelly = ShellCommandTask( - name="shelly", executable="executable", input_spec=my_input_spec - ) + shelly = ShellTask(name="shelly", executable="executable", input_spec=my_input_spec) shelly.inputs.arg2 = "argument2" @@ -771,9 +757,7 @@ def test_shell_cmd_inputs_template_1(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" @@ -812,9 +796,7 @@ def test_shell_cmd_inputs_template_1a(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") # outA has no argstr in metadata, so it's not a part of the command line assert shelly.cmdline == "executable inpA" @@ -848,7 +830,7 @@ def test_shell_cmd_inputs_template_2(): bases=(ShellSpec,), ) - shelly = ShellCommandTask(executable="executable", input_spec=my_input_spec) + shelly = ShellTask(executable="executable", input_spec=my_input_spec) # inpB not in the inputs, so no outB in the command line assert shelly.cmdline == "executable" # checking if outB in the output fields @@ -926,7 +908,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA, inpB=inpB ) # using syntax from the outAB field @@ -1006,7 +988,7 @@ def test_shell_cmd_inputs_template_3a(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", inpB="inpB" ) # using syntax from the outAB field @@ -1082,9 +1064,7 @@ def test_shell_cmd_inputs_template_4(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") # inpB is not provided so outB not in the command line assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] @@ -1111,9 +1091,7 @@ def test_shell_cmd_inputs_template_5_ex(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, outAB="outAB" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, outAB="outAB") with pytest.raises(Exception) as e: shelly.cmdline assert "read only" in str(e.value) @@ -1158,25 +1136,23 @@ def test_shell_cmd_inputs_template_6(): # no input for outA (and no default value), so the output is created whenever the # template can be formatted (the same way as for templates that has type=str) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # a string is provided for outA, so this should be used as the outA value - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" ) assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True ) assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False ) assert shelly.cmdline == "executable inpA" @@ -1220,25 +1196,23 @@ def test_shell_cmd_inputs_template_6a(): ) # no input for outA, but default is False, so the outA shouldn't be used - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") assert shelly.cmdline == "executable inpA" # a string is provided for outA, so this should be used as the outA value - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" ) assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True ) assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False ) assert shelly.cmdline == "executable inpA" @@ -1281,7 +1255,7 @@ def test_shell_cmd_inputs_template_7(tmp_path: Path): inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1330,7 +1304,7 @@ def test_shell_cmd_inputs_template_7a(tmp_path: Path): inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1379,7 +1353,7 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1425,7 +1399,7 @@ def test_shell_cmd_inputs_template_8(tmp_path: Path): inpA_file = tmp_path / "a_file.t" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1486,7 +1460,7 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpInt=3 ) @@ -1548,7 +1522,7 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpStr="hola" ) @@ -1613,7 +1587,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): inpFile_file = tmp_path / "inpFile.t" inpFile_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file, @@ -1674,7 +1648,7 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file, @@ -1719,9 +1693,7 @@ def test_shell_cmd_inputs_template_10(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA=3.3456 - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA=3.3456) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line assert shelly.cmdline == f"executable 3.3 -o {shelly.output_dir / 'file_3.3_out'}" @@ -1771,9 +1743,7 @@ def test_shell_cmd_inputs_template_requires_1(): ) # When requirements are not met. - shelly = ShellCommandTask( - executable="cmd", input_spec=my_input_spec, in_file="in.file" - ) + shelly = ShellTask(executable="cmd", input_spec=my_input_spec, in_file="in.file") assert "--tpl" not in shelly.cmdline # When requirements are met. @@ -1821,9 +1791,7 @@ def template_fun(inputs): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA" - ) + shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" @@ -1881,7 +1849,7 @@ def template_fun(inputs): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( executable="executable", input_spec=my_input_spec, inpA="inpA", @@ -1927,7 +1895,7 @@ def test_shell_cmd_inputs_template_1_st(): ) inpA = ["inpA_1", "inpA_2"] - ShellCommandTask( + ShellTask( name="f", executable="executable", input_spec=my_input_spec, @@ -2125,13 +2093,13 @@ def test_shell_cmd_inputs_denoise_image( my_input_file.write_text("content") # no input provided - shelly = ShellCommandTask(executable="DenoiseImage", input_spec=my_input_spec) + shelly = ShellTask(executable="DenoiseImage", input_spec=my_input_spec) with pytest.raises(Exception) as e: shelly.cmdline assert "mandatory" in str(e.value) # input file name, noiseImage is not set, so using default value False - shelly = ShellCommandTask( + shelly = ShellTask( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2142,7 +2110,7 @@ def test_shell_cmd_inputs_denoise_image( ) # input file name, noiseImage is set to True, so template is used in the output - shelly = ShellCommandTask( + shelly = ShellTask( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2154,7 +2122,7 @@ def test_shell_cmd_inputs_denoise_image( ) # input file name and help_short - shelly = ShellCommandTask( + shelly = ShellTask( executable="DenoiseImage", inputImageFilename=my_input_file, help_short=True, @@ -2174,7 +2142,7 @@ def test_shell_cmd_inputs_denoise_image( ] # adding image_dimensionality that has allowed_values [2, 3, 4] - shelly = ShellCommandTask( + shelly = ShellTask( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2187,7 +2155,7 @@ def test_shell_cmd_inputs_denoise_image( # adding image_dimensionality that has allowed_values [2, 3, 4] and providing 5 - exception should be raised with pytest.raises(ValueError) as excinfo: - shelly = ShellCommandTask( + shelly = ShellTask( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2199,7 +2167,7 @@ def test_shell_cmd_inputs_denoise_image( # tests with XOR in input metadata -class SimpleTaskXor(ShellCommandTask): +class SimpleTaskXor(ShellTask): input_fields = [ ( "input_1", @@ -2233,7 +2201,7 @@ class SimpleTaskXor(ShellCommandTask): task_input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellSpec,)) task_output_fields = [] task_output_spec = SpecInfo( - name="Output", fields=task_output_fields, bases=(ShellOutSpec,) + name="Output", fields=task_output_fields, bases=(ShellOutputs,) ) input_spec = task_input_spec diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 791575adc1..0af3792444 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -3,10 +3,10 @@ import pytest import attr -from ..task import ShellCommandTask +from ..task import ShellTask from ..submitter import Submitter from ..core import Workflow -from ..specs import ShellOutSpec, SpecInfo, File, ShellSpec +from ..specs import ShellOutputs, SpecInfo, File, ShellSpec from ..environments import Singularity @@ -30,7 +30,7 @@ def test_singularity_1_nosubm(tmp_path): """ cmd = "pwd" image = "docker://alpine" - singu = ShellCommandTask( + singu = ShellTask( name="singu", executable=cmd, environment=Singularity(image=image), @@ -52,7 +52,7 @@ def test_singularity_2_nosubm(tmp_path): """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = ShellCommandTask( + singu = ShellTask( name="singu", executable=cmd, environment=Singularity(image=image), @@ -73,7 +73,7 @@ def test_singularity_2(plugin, tmp_path): cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = ShellCommandTask( + singu = ShellTask( name="singu", executable=cmd, environment=Singularity(image=image), @@ -97,7 +97,7 @@ def test_singularity_2a(plugin, tmp_path): cmd_args = ["hail", "pydra"] # separate command into exec + args image = "docker://alpine" - singu = ShellCommandTask( + singu = ShellTask( name="singu", executable=cmd_exec, args=cmd_args, @@ -123,7 +123,7 @@ def test_singularity_st_1(plugin, tmp_path): """ cmd = ["pwd", "ls"] image = "docker://alpine" - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), cache_dir=tmp_path ).split("executable", executable=cmd) assert singu.state.splitter == "singu.executable" @@ -145,7 +145,7 @@ def test_singularity_st_2(tmp_path, n): """splitter over args (checking bigger splitters if slurm available)""" args_n = list(range(n)) image = "docker://alpine" - singu = ShellCommandTask( + singu = ShellTask( name="singu", executable="echo", environment=Singularity(image=image), @@ -173,9 +173,9 @@ def test_singularity_outputspec_1(plugin, tmp_path): my_output_spec = SpecInfo( name="Output", fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutSpec,), + bases=(ShellOutputs,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -223,7 +223,7 @@ def test_singularity_inputspec_1(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -264,7 +264,7 @@ def test_singularity_inputspec_1a(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -321,7 +321,7 @@ def test_singularity_inputspec_2(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -381,7 +381,7 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -441,7 +441,7 @@ def test_singularity_inputspec_2a(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -498,7 +498,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -554,7 +554,7 @@ def test_singularity_inputspec_state_1(tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -604,7 +604,7 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): bases=(ShellSpec,), ) - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=cmd, @@ -651,7 +651,7 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): wf.inputs.cmd = cmd wf.inputs.file = filename - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=wf.lzin.cmd, @@ -706,7 +706,7 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) wf.inputs.cmd = cmd - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=wf.lzin.cmd, @@ -764,7 +764,7 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): wf.inputs.cmd = cmd wf.inputs.file = filename - singu = ShellCommandTask( + singu = ShellTask( name="singu", environment=Singularity(image=image), executable=wf.lzin.cmd, diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 504d9fe71c..c06d9c6e17 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -69,14 +69,14 @@ class InpSpec: def __init__(self): self.fields = [("inp_a", int), ("inp_b", int)] - class OutSpec: + class Outputs: def __init__(self): self.fields = [("out_a", int)] self.name = "tn" self.inputs = self.Input() self.input_spec = InpSpec() - self.output_spec = OutSpec() + self.output_spec = Outputs() self.output_names = ["out_a"] self.state = None diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index d434004e3b..8699eb1711 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -10,7 +10,7 @@ from pydra import mark from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages from ..core import Workflow -from ..task import AuditFlag, ShellCommandTask, argstr_formatting +from ..task import AuditFlag, ShellTask, argstr_formatting from .utils import gen_basic_wf from ..specs import ( MultiInputObj, @@ -67,7 +67,7 @@ def test_checksum(): nn = funaddtwo(a=3) assert ( nn.checksum - == "FunctionTask_abb4e7cc03b13d0e73884b87d142ed5deae6a312275187a9d8df54407317d7d3" + == "PythonTask_abb4e7cc03b13d0e73884b87d142ed5deae6a312275187a9d8df54407317d7d3" ) @@ -105,7 +105,7 @@ def testfunc( help = funky.help(returnhelp=True) assert help == [ - "Help for FunctionTask", + "Help for PythonTask", "Input Parameters:", "- a: int", "- b: float (default: 0.1)", @@ -165,7 +165,7 @@ def testfunc( help = funky.help(returnhelp=True) assert help == [ - "Help for FunctionTask", + "Help for PythonTask", "Input Parameters:", "- a: float", "- _func: bytes", @@ -470,7 +470,7 @@ def testfunc(a, b) -> int: help = funky.help(returnhelp=True) assert help == [ - "Help for FunctionTask", + "Help for PythonTask", "Input Parameters:", "- a: _empty", "- b: _empty", @@ -511,7 +511,7 @@ def testfunc(a, b) -> (int, int): help = funky.help(returnhelp=True) assert help == [ - "Help for FunctionTask", + "Help for PythonTask", "Input Parameters:", "- a: _empty", "- b: _empty", @@ -811,7 +811,7 @@ def testfunc(a, b=1): def test_input_spec_func_5(): - """the FunctionTask with input_spec, a input has MultiInputObj type + """the PythonTask with input_spec, a input has MultiInputObj type a single value is provided and should be converted to a list """ @@ -1050,7 +1050,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] def test_audit_shellcommandtask(tmpdir): args = "-l" - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable="ls", args=args, @@ -1138,7 +1138,7 @@ def test_audit_shellcommandtask_file(tmp_path): ], bases=(ShellSpec,), ) - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", in_file=file_in, in_file_2=file_in_2, @@ -1171,7 +1171,7 @@ def test_audit_shellcommandtask_version(tmpdir): ) version_cmd = version_cmd.splitlines()[0] cmd = "less" - shelly = ShellCommandTask( + shelly = ShellTask( name="shelly", executable=cmd, args="test_task.py", @@ -1312,13 +1312,13 @@ def test_shell_cmd(tmpdir): cmd = ["echo", "hail", "pydra"] # all args given as executable - shelly = ShellCommandTask(name="shelly", executable=cmd) + shelly = ShellTask(name="shelly", executable=cmd) assert shelly.cmdline == " ".join(cmd) res = shelly._run() assert res.output.stdout == " ".join(cmd[1:]) + "\n" # separate command into exec + args - shelly = ShellCommandTask(executable=cmd[0], args=cmd[1:]) + shelly = ShellTask(executable=cmd[0], args=cmd[1:]) assert shelly.inputs.executable == "echo" assert shelly.cmdline == " ".join(cmd) res = shelly._run() diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 3da1398c40..791b6a0123 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -3953,7 +3953,7 @@ def test_workflow_combine2(tmpdir): assert result.output.out_iden == [[1, 4], [1, 8]] -# testing lzout.all to collect all of the results and let FunctionTask deal with it +# testing lzout.all to collect all of the results and let PythonTask deal with it def test_wf_lzoutall_1(plugin, tmpdir): diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index e14f581ec9..190561dea6 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -11,12 +11,12 @@ from .node import Node -OutSpecType = ty.TypeVar("OutputType", bound=Outputs) -WorkflowOutSpecType = ty.TypeVar("OutputType", bound=WorkflowOutputs) +OutputsType = ty.TypeVar("OutputType", bound=Outputs) +WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) @attrs.define(auto_attribs=False) -class Workflow(ty.Generic[WorkflowOutSpecType]): +class Workflow(ty.Generic[WorkflowOutputsType]): """A workflow, constructed from a workflow specification Parameters @@ -30,14 +30,14 @@ class Workflow(ty.Generic[WorkflowOutSpecType]): """ name: str = attrs.field() - inputs: TaskSpec[WorkflowOutSpecType] = attrs.field() - outputs: WorkflowOutSpecType = attrs.field() + inputs: TaskSpec[WorkflowOutputsType] = attrs.field() + outputs: WorkflowOutputsType = attrs.field() _nodes: dict[str, Node] = attrs.field(factory=dict) @classmethod def construct( cls, - spec: TaskSpec[WorkflowOutSpecType], + spec: TaskSpec[WorkflowOutputsType], ) -> Self: """Construct a workflow from a specification, caching the constructed worklow""" @@ -120,7 +120,7 @@ def construct( return wf - def add(self, task_spec: TaskSpec[OutSpecType], name=None) -> OutSpecType: + def add(self, task_spec: TaskSpec[OutputsType], name=None) -> OutputsType: """Add a node to the workflow Parameters @@ -140,7 +140,7 @@ def add(self, task_spec: TaskSpec[OutSpecType], name=None) -> OutSpecType: name = type(task_spec).__name__ if name in self._nodes: raise ValueError(f"Node with name {name!r} already exists in the workflow") - node = Node[OutSpecType](name=name, spec=task_spec, workflow=self) + node = Node[OutputsType](name=name, spec=task_spec, workflow=self) self._nodes[name] = node return node.lzout diff --git a/pydra/mark/tests/test_functions.py b/pydra/mark/tests/test_functions.py index 2383ce0057..e124c518dc 100644 --- a/pydra/mark/tests/test_functions.py +++ b/pydra/mark/tests/test_functions.py @@ -3,14 +3,14 @@ import typing as ty from ..functions import task, annotate -from pydra.engine.task import FunctionTask +from pydra.engine.task import PythonTask def test_task_equivalence(): def add_two(a): return a + 2 - canonical = FunctionTask(add_two, a=3) + canonical = PythonTask(add_two, a=3) decorated1 = task(add_two)(a=3) From f227987216137dbad4b4913024f8a276ef293eac Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 23:06:27 +1100 Subject: [PATCH 057/342] added dataclass_transform decorators to define and outputs --- pydra/design/python.py | 14 ++++++++++++++ pydra/design/shell.py | 14 ++++++++++++++ pydra/design/tests/test_shell.py | 14 +++++++++++--- pydra/design/tests/test_workflow.py | 1 + pydra/design/workflow.py | 14 ++++++++++++++ 5 files changed, 54 insertions(+), 3 deletions(-) diff --git a/pydra/design/python.py b/pydra/design/python.py index 610979910c..abeecf30b2 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -1,5 +1,6 @@ import typing as ty import inspect +from typing_extensions import dataclass_transform import attrs from .base import ( Arg, @@ -79,6 +80,19 @@ class out(Out): pass +@dataclass_transform( + kw_only_default=True, + field_specifiers=(out,), +) +def outputs(wrapped): + """Decorator to specify the output fields of a shell command is a dataclass-style type""" + return wrapped + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(arg,), +) def define( wrapped: type | ty.Callable | None = None, /, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index c38c333151..26d798c3de 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -8,6 +8,7 @@ from copy import copy import attrs import builtins +from typing_extensions import dataclass_transform from fileformats.core import from_mime from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError @@ -198,6 +199,19 @@ def _validate_path_template(self, attribute, value): ) +@dataclass_transform( + kw_only_default=True, + field_specifiers=(out, outarg), +) +def outputs(wrapped): + """Decorator to specify the output fields of a shell command is a dataclass-style type""" + return wrapped + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(arg,), +) def define( wrapped: type | str | None = None, /, diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 3b8e3fe7f3..3774020614 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -6,7 +6,7 @@ import cloudpickle as cp from pydra.design import shell from pydra.engine.helpers import list_fields -from pydra.engine.specs import ShellSpec +from pydra.engine.specs import ShellSpec, ShellOutputs from fileformats.generic import File, Directory, FsObject from fileformats import text, image from pydra.utils.typing import MultiInputObj @@ -267,7 +267,8 @@ class Ls(ShellSpec["Ls.Outputs"]): xor=["complete_date"], ) - class Outputs: + @shell.outputs + class Outputs(ShellOutputs): entries: list = shell.out( help_string="list of entries returned by ls command", callable=list_entries, @@ -346,7 +347,14 @@ def test_shell_fields(Ls): ] ) - assert [a.name for a in sorted_fields(Ls.Outputs)] == ["entries"] + assert [a.name for a in sorted_fields(Ls.Outputs)] == sorted( + [ + "entries", + "stdout", + "stderr", + "return_code", + ] + ) def test_shell_pickle_roundtrip(Ls, tmp_path): diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 1480bdbe6f..ad15cc3aa9 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -152,6 +152,7 @@ def constructor(a, b): mul = workflow.add(Mul(a=add.out, b=b)) return mul.out + @workflow.outputs class Outputs(WorkflowOutputs): out: float diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 564ab8e09f..8dfc193c93 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -1,5 +1,6 @@ import typing as ty import inspect +from typing_extensions import dataclass_transform import attrs from .base import ( Arg, @@ -84,6 +85,19 @@ class out(Out): pass +@dataclass_transform( + kw_only_default=True, + field_specifiers=(out,), +) +def outputs(wrapped): + """Decorator to specify the output fields of a shell command is a dataclass-style type""" + return wrapped + + +@dataclass_transform( + kw_only_default=True, + field_specifiers=(arg,), +) def define( wrapped: type | ty.Callable | None = None, /, From 0cf7d35885ac4708cd9f8c4db2ce9cd57b73dc31 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 9 Dec 2024 23:12:12 +1100 Subject: [PATCH 058/342] Added the use of '?' to signify optional shell template fields --- pydra/design/shell.py | 9 +++++++-- pydra/design/tests/test_shell.py | 14 +++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 26d798c3de..74c4a894e7 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -461,7 +461,7 @@ def parse_command_line_template( return template, inputs, outputs executable, args_str = parts tokens = re.split(r"\s+", args_str.strip()) - arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+)>" + arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+\??)>" opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) opt_re = re.compile(opt_pattern) @@ -534,12 +534,17 @@ def from_type_str(type_str) -> type: else: field_type = arg # Identify type after ':' symbols + if name.endswith("?"): + name = name[:-1] + optional = True + else: + optional = False if ":" in name: name, type_str = name.split(":") type_ = from_type_str(type_str) else: type_ = generic.FsObject if option is None else str - if option is not None: + if optional: type_ |= None # Make the arguments optional kwds = {"type": type_} # If name contains a '.', treat it as a file template and strip it from the name diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 3774020614..48ad389fcb 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -77,9 +77,9 @@ def test_interface_template_more_complex(): ( "cp " "-R " - "--text-arg " - "--int-arg " - "--tuple-arg " + "--text-arg " + "--int-arg " + "--tuple-arg " ), ) @@ -129,7 +129,7 @@ def test_interface_template_with_overrides(): "cp " "-R " "--text-arg " - "--int-arg " + "--int-arg " "--tuple-arg " ), inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, @@ -162,12 +162,12 @@ def test_interface_template_with_overrides(): help_string=RECURSIVE_HELP, position=2, ), - shell.arg(name="text_arg", argstr="--text-arg", type=str | None, position=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=4), shell.arg( name="tuple_arg", argstr="--tuple-arg", - type=tuple[int, str] | None, + type=tuple[int, str], position=5, ), output, @@ -213,7 +213,7 @@ def test_interface_template_with_type_overrides(): shell.arg( name="tuple_arg", argstr="--tuple-arg", - type=tuple[int, str] | None, + type=tuple[int, str], position=6, ), ] From 4be0945a37a4531526c68b6c19b7a19dc921b451 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 10 Dec 2024 12:16:07 +1100 Subject: [PATCH 059/342] added in "modify|" syntax as discussed with @satra --- pydra/design/shell.py | 35 ++++++++++++++++++++++++++------ pydra/design/tests/test_shell.py | 28 +++++++++++++++++++++++++ pydra/engine/state.py | 5 +++++ pydra/engine/workflow/node.py | 32 ++++++++++++++++++----------- 4 files changed, 82 insertions(+), 18 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 74c4a894e7..a550eb2b16 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -461,7 +461,7 @@ def parse_command_line_template( return template, inputs, outputs executable, args_str = parts tokens = re.split(r"\s+", args_str.strip()) - arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+\??)>" + arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+(?:\?|=[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) opt_re = re.compile(opt_pattern) @@ -470,10 +470,8 @@ def parse_command_line_template( arguments = [] option = None - def add_arg(name, field_type, kwds, is_option=False): + def add_arg(name, field_type, kwds): """Merge the typing information with an existing field if it exists""" - if is_option and kwds["type"] is not bool: - kwds["type"] |= None if issubclass(field_type, Out): dct = outputs else: @@ -497,7 +495,8 @@ def add_arg(name, field_type, kwds, is_option=False): for k, v in kwds.items(): setattr(field, k, v) dct[name] = field - arguments.append(field) + if issubclass(field_type, Arg): + arguments.append(field) def from_type_str(type_str) -> type: types = [] @@ -528,9 +527,14 @@ def from_type_str(type_str) -> type: for token in tokens: if match := arg_re.match(token): name = match.group(1) + modify = False if name.startswith("out|"): name = name[4:] field_type = outarg + elif name.startswith("modify|"): + name = name[7:] + field_type = arg + modify = True else: field_type = arg # Identify type after ':' symbols @@ -539,6 +543,10 @@ def from_type_str(type_str) -> type: optional = True else: optional = False + kwds = {} + if "=" in name: + name, default = name.split("=") + kwds["default"] = eval(default) if ":" in name: name, type_str = name.split(":") type_ = from_type_str(type_str) @@ -546,7 +554,11 @@ def from_type_str(type_str) -> type: type_ = generic.FsObject if option is None else str if optional: type_ |= None # Make the arguments optional - kwds = {"type": type_} + kwds["type"] = type_ + if modify: + kwds["copy_mode"] = generic.File.CopyMode.copy + # Add field to outputs with the same name as the input + add_arg(name, out, {"type": type_, "callable": _InputPassThrough(name)}) # If name contains a '.', treat it as a file template and strip it from the name if field_type is outarg: path_template = name @@ -566,6 +578,7 @@ def from_type_str(type_str) -> type: kwds["argstr"] = option add_arg(name, field_type, kwds) option = None + elif match := bool_arg_re.match(token): argstr, var = match.groups() add_arg(var, arg, {"type": bool, "argstr": argstr, "default": False}) @@ -626,3 +639,13 @@ def remaining_positions( f"Multiple fields have the overlapping positions: {multiple_positions}" ) return [i for i in range(start, num_args) if i not in positions] + + +@attrs.define +class _InputPassThrough: + """A class that can be used to pass through an input to the output""" + + name: str + + def __call__(self, inputs: ShellSpec) -> ty.Any: + return getattr(inputs, self.name) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 48ad389fcb..0b8a3fa107 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -71,6 +71,34 @@ def test_interface_template_w_types_and_path_template_ext(): SampleInterface.Outputs(out_image=image.Png.mock()) +def test_interface_template_w_modify(): + + SampleInterface = shell.define("trim-png ") + + assert issubclass(SampleInterface, ShellSpec) + assert sorted_fields(SampleInterface) == [ + shell.arg( + name="executable", + default="trim-png", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg( + name="image", type=image.Png, position=1, copy_mode=File.CopyMode.copy + ), + ] + assert sorted_fields(SampleInterface.Outputs) == [ + shell.out( + name="image", + type=image.Png, + callable=shell._InputPassThrough("image"), + ) + ] + SampleInterface(image=image.Png.mock()) + SampleInterface.Outputs(image=image.Png.mock()) + + def test_interface_template_more_complex(): SampleInterface = shell.define( diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 8fcfd67571..5bed7eb71a 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -111,6 +111,11 @@ def __str__(self): f"and combiner: {self.combiner}" ) + @property + def depth(self): + """Return the number of uncombined splits of the state.""" + return len(self.states_ind) + @property def splitter(self): """Get the splitter of the state.""" diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 307610d7c5..e026e15764 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -81,8 +81,26 @@ def inputs(self) -> Inputs: @property def state(self): + """Initialise the state of the node just after it has been created (i.e. before + it has been split or combined) based on the upstream connections + """ if self._state is not NOT_SET: return self._state + upstream_states = self._upstream_states() + if upstream_states: + state = State( + self.name, + splitter=None, + other_states=upstream_states, + combiner=None, + ) + else: + state = None + self._state = state + return state + + def _upstream_states(self): + """Get the states of the upstream nodes that are connected to this node""" upstream_states = {} for inpt_name, val in self.input_values: if isinstance(val, lazy.LazyOutField) and val.node.state: @@ -97,17 +115,7 @@ def state(self): # if the task already exist in other_state, # additional field name should be added to the list of fields upstream_states[node.name][1].append(inpt_name) - if upstream_states: - state = State( - node.name, - splitter=None, - other_states=upstream_states, - combiner=None, - ) - else: - state = None - self._state = state - return state + return upstream_states @property def input_values(self) -> tuple[tuple[str, ty.Any]]: @@ -248,7 +256,7 @@ def combine( raise Exception("combiner has to be a string or a list") combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) if not_split := [ - c for c in combiner if not any(c in s for s in self.state.splitter) + c for c in combiner if not any(c in s for s in self.state.splitter_rpn) ]: raise ValueError( f"Combiner fields {not_split} for Node {self.name!r} are not in the " From 328e0b1e35f540c050b6a3a69ccbf2e77d70535e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 10 Dec 2024 12:55:04 +1100 Subject: [PATCH 060/342] implemented defaults in shell command template using '=' syntax --- pydra/design/shell.py | 17 +++-- pydra/design/tests/test_shell.py | 122 ++++++++++++++++++++++++++----- 2 files changed, 117 insertions(+), 22 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index a550eb2b16..346ebd2eed 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -538,15 +538,17 @@ def from_type_str(type_str) -> type: else: field_type = arg # Identify type after ':' symbols + kwds = {} if name.endswith("?"): + assert "=" not in name name = name[:-1] optional = True - else: - optional = False - kwds = {} - if "=" in name: + kwds["default"] = None + elif "=" in name: name, default = name.split("=") kwds["default"] = eval(default) + else: + optional = False if ":" in name: name, type_str = name.split(":") type_ = from_type_str(type_str) @@ -581,7 +583,12 @@ def from_type_str(type_str) -> type: elif match := bool_arg_re.match(token): argstr, var = match.groups() - add_arg(var, arg, {"type": bool, "argstr": argstr, "default": False}) + if "=" in var: + var, default = var.split("=") + default = eval(default) + else: + default = False + add_arg(var, arg, {"type": bool, "argstr": argstr, "default": default}) elif match := opt_re.match(token): option = token else: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 0b8a3fa107..f81664e6a0 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -131,12 +131,25 @@ def test_interface_template_more_complex(): ), output, shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), - shell.arg(name="text_arg", argstr="--text-arg", type=str | None, position=4), - shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=5), + shell.arg( + name="text_arg", + argstr="--text-arg", + type=str | None, + default=None, + position=4, + ), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + default=None, + position=5, + ), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str] | None, + default=None, position=6, ), ] @@ -145,7 +158,7 @@ def test_interface_template_more_complex(): SampleInterface.Outputs(out_dir=Directory.sample()) -def test_interface_template_with_overrides(): +def test_interface_template_with_overrides_and_optionals(): RECURSIVE_HELP = ( "If source_file designates a directory, cp copies the directory and the entire " @@ -154,14 +167,86 @@ def test_interface_template_with_overrides(): SampleInterface = shell.define( ( - "cp " + "cp " "-R " "--text-arg " "--int-arg " "--tuple-arg " ), inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, - outputs={"out_dir": shell.outarg(position=-1)}, + outputs={ + "out_dir": shell.outarg(position=-2), + "out_file": shell.outarg(position=-1), + }, + ) + + assert issubclass(SampleInterface, ShellSpec) + outargs = [ + shell.outarg( + name="out_dir", + type=Directory, + path_template="out_dir", + position=-2, + ), + shell.outarg( + name="out_file", + type=File | None, + default=None, + path_template="out_file", + position=-1, + ), + ] + assert ( + sorted_fields(SampleInterface) + == [ + shell.arg( + name="executable", + default="cp", + type=str | ty.Sequence[str], + position=0, + help_string=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg( + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + ), + shell.arg( + name="recursive", + argstr="-R", + type=bool, + default=False, + help_string=RECURSIVE_HELP, + position=2, + ), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + default=None, + position=4, + ), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[int, str], + position=5, + ), + ] + + outargs + ) + assert sorted_fields(SampleInterface.Outputs) == outargs + + +def test_interface_template_with_defaults(): + + SampleInterface = shell.define( + ( + "cp " + "-R " + "--text-arg " + "--int-arg " + "--tuple-arg " + ), ) assert issubclass(SampleInterface, ShellSpec) @@ -169,7 +254,7 @@ def test_interface_template_with_overrides(): name="out_dir", type=Directory, path_template="out_dir", - position=-1, + position=2, ) assert sorted_fields(SampleInterface) == [ shell.arg( @@ -182,25 +267,23 @@ def test_interface_template_with_overrides(): shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), + output, + shell.arg(name="recursive", argstr="-R", type=bool, default=True, position=3), shell.arg( - name="recursive", - argstr="-R", - type=bool, - default=False, - help_string=RECURSIVE_HELP, - position=2, + name="text_arg", argstr="--text-arg", type=str, position=4, default="foo" ), - shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), - shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=4), + shell.arg(name="int_arg", argstr="--int-arg", type=int, position=5, default=99), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], - position=5, + default=(1, "bar"), + position=6, ), - output, ] assert sorted_fields(SampleInterface.Outputs) == [output] + SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) + SampleInterface.Outputs(out_dir=Directory.sample()) def test_interface_template_with_type_overrides(): @@ -237,7 +320,12 @@ def test_interface_template_with_type_overrides(): output, shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), shell.arg(name="text_arg", argstr="--text-arg", type=str, position=4), - shell.arg(name="int_arg", argstr="--int-arg", type=int | None, position=5), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + position=5, + ), shell.arg( name="tuple_arg", argstr="--tuple-arg", From cf7b331ddc7ac9969e938bb9a4d835b43893fe63 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 10 Dec 2024 14:44:03 +1100 Subject: [PATCH 061/342] cleaned up insertion of special stdout, stderr and return_code outputs form shell commands --- pydra/design/base.py | 12 +++ pydra/design/tests/test_shell.py | 164 +++++++++++++++++++++++++++++-- pydra/engine/specs.py | 14 +-- 3 files changed, 174 insertions(+), 16 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index d50f13c326..feff4a4467 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -505,6 +505,18 @@ def make_outputs_spec( raise ValueError( f"{reserved_names} are reserved and cannot be used for output field names" ) + # Add in any fields in base classes that haven't already been converted into attrs + # fields (e.g. stdout, stderr and return_code) + for base in outputs_bases: + base_outputs = { + n: o + for n, o in base.__dict__.items() + if isinstance(o, Out) and n not in outputs + } + for name, field in base_outputs.items(): + field.name = name + field.type = base.__annotations__.get(name, ty.Any) + outputs.update(base_outputs) outputs_klass = type( spec_name + "Outputs", tuple(outputs_bases), diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index f81664e6a0..1cbae39be8 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -6,7 +6,13 @@ import cloudpickle as cp from pydra.design import shell from pydra.engine.helpers import list_fields -from pydra.engine.specs import ShellSpec, ShellOutputs +from pydra.engine.specs import ( + ShellSpec, + ShellOutputs, + RETURN_CODE_HELP, + STDOUT_HELP, + STDERR_HELP, +) from fileformats.generic import File, Directory, FsObject from fileformats import text, image from pydra.utils.typing import MultiInputObj @@ -34,7 +40,24 @@ def test_interface_template(): shell.arg(name="in_path", type=FsObject, position=1), output, ] - assert sorted_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] intf = SampleInterface(in_path=File.mock("in-path.txt")) assert intf.executable == "cp" SampleInterface(in_path=File.mock("in-path.txt"), out_path=Path("./out-path.txt")) @@ -65,7 +88,24 @@ def test_interface_template_w_types_and_path_template_ext(): shell.arg(name="in_image", type=image.Png, position=1), output, ] - assert sorted_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] SampleInterface(in_image=image.Png.mock()) SampleInterface(in_image=image.Png.mock(), out_image=Path("./new_image.png")) SampleInterface.Outputs(out_image=image.Png.mock()) @@ -93,7 +133,22 @@ def test_interface_template_w_modify(): name="image", type=image.Png, callable=shell._InputPassThrough("image"), - ) + ), + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), ] SampleInterface(image=image.Png.mock()) SampleInterface.Outputs(image=image.Png.mock()) @@ -153,7 +208,24 @@ def test_interface_template_more_complex(): position=6, ), ] - assert sorted_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) SampleInterface.Outputs(out_dir=Directory.sample()) @@ -234,7 +306,23 @@ def test_interface_template_with_overrides_and_optionals(): ] + outargs ) - assert sorted_fields(SampleInterface.Outputs) == outargs + assert sorted_fields(SampleInterface.Outputs) == outargs + [ + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] def test_interface_template_with_defaults(): @@ -281,7 +369,24 @@ def test_interface_template_with_defaults(): position=6, ), ] - assert sorted_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) SampleInterface.Outputs(out_dir=Directory.sample()) @@ -333,7 +438,24 @@ def test_interface_template_with_type_overrides(): position=6, ), ] - assert sorted_fields(SampleInterface.Outputs) == [output] + assert sorted_fields(SampleInterface.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] @pytest.fixture(params=["static", "dynamic"]) @@ -582,7 +704,12 @@ class Outputs: ) assert sorted([a.name for a in attrs.fields(A)]) == ["executable", "x", "y"] - assert [a.name for a in attrs.fields(A.Outputs)] == ["y"] + assert sorted(a.name for a in attrs.fields(A.Outputs)) == [ + "return_code", + "stderr", + "stdout", + "y", + ] output = shell.outarg( name="y", type=File, @@ -609,7 +736,24 @@ class Outputs: ), output, ] - assert sorted_fields(A.Outputs) == [output] + assert sorted_fields(A.Outputs) == [ + output, + shell.out( + name="return_code", + type=int, + help_string=RETURN_CODE_HELP, + ), + shell.out( + name="stderr", + type=str, + help_string=STDERR_HELP, + ), + shell.out( + name="stdout", + type=str, + help_string=STDOUT_HELP, + ), + ] def test_shell_output_field_name_dynamic(): diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 4960c88c82..a643f0fef2 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -335,15 +335,17 @@ class WorkflowSpec(TaskSpec[WorkflowOutputsType]): pass +RETURN_CODE_HELP = """The process' exit code.""" +STDOUT_HELP = """The standard output stream produced by the command.""" +STDERR_HELP = """The standard error stream produced by the command.""" + + class ShellOutputs(Outputs): """Output specification of a generic shell process.""" - return_code: int = shell.out() - """The process' exit code.""" - stdout: str = shell.out() - """The process' standard output.""" - stderr: str = shell.out() - """The process' standard input.""" + return_code: int = shell.out(help_string=RETURN_CODE_HELP) + stdout: str = shell.out(help_string=STDOUT_HELP) + stderr: str = shell.out(help_string=STDERR_HELP) @classmethod def collect_outputs( From c6b7cd70664dfaf4ab1190e6ae5dd7de19930651 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 10 Dec 2024 16:20:28 +1100 Subject: [PATCH 062/342] debugged setting of state in split and combine --- pydra/design/tests/test_workflow.py | 9 +- pydra/engine/state.py | 25 ++++- pydra/engine/workflow/node.py | 157 ++++++++++------------------ 3 files changed, 82 insertions(+), 109 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index ad15cc3aa9..0b0a47eb63 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -369,9 +369,12 @@ def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: wf = Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0], c=2.0)) assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] - assert wf["Mul"].combiner == ["Mul.x"] - assert wf["Add"].lzout.out.splits == frozenset(["Mul.x"]) - assert wf.outputs.out == LazyOutField(node=wf["Sum"], field="out", type=list[float]) + assert wf["Mul"].combiner == [] + assert wf["Add"].splitter == "_Mul" + assert wf["Add"].combiner == ["Mul.x"] + assert wf.outputs.out == LazyOutField( + node=wf["Sum"], field="out", type=list[float], type_checked=True + ) def test_workflow_split_after_access_fail(): diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 5bed7eb71a..fb26767e5e 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -112,9 +112,28 @@ def __str__(self): ) @property - def depth(self): - """Return the number of uncombined splits of the state.""" - return len(self.states_ind) + def depth(self) -> int: + """Return the number of uncombined splits of the state, i.e. the number nested + state arrays to wrap around the type of lazy out fields + + Returns + ------- + int + number of uncombined splits + """ + depth = 0 + stack = [] + for spl in self.splitter_rpn: + if spl in [".", "*"]: + if spl == ".": + depth += int(all(s not in self.combiner for s in stack)) + else: + assert spl == "*" + depth += len([s for s in stack if s not in self.combiner]) + stack = [] + else: + stack.append(spl) + return depth + len(stack) @property def splitter(self): diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index e026e15764..36efa95af1 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -5,7 +5,7 @@ from pydra.utils.typing import TypeParser, StateArray from . import lazy from ..specs import TaskSpec, Outputs -from ..helpers import ensure_list, attrs_values +from ..helpers import ensure_list, attrs_values, is_lazy from .. import helpers_state as hlpst from ..state import State @@ -62,17 +62,16 @@ def __getattr__(self, name: str) -> ty.Any: return getattr(self._node._spec, name) def __setattr__(self, name: str, value: ty.Any) -> None: - if isinstance(value, lazy.LazyField): - # Save the current state for comparison later - prev_state = self._node.state - if value.node.state: - # Reset the state to allow the lazy field to be set - self._node._state = NOT_SET - setattr(self._node._spec, name, value) - if value.node.state and self._node.state != prev_state: + setattr(self._node._spec, name, value) + if is_lazy(value): + upstream_states = self._node._get_upstream_states() + if ( + not self._node._state + or self._node._state.other_states != upstream_states + ): self._node._check_if_outputs_have_been_used( f"cannot set {name!r} input to {value} because it changes the " - f"state of the node from {prev_state} to {value.node.state}" + f"state" ) @property @@ -86,36 +85,8 @@ def state(self): """ if self._state is not NOT_SET: return self._state - upstream_states = self._upstream_states() - if upstream_states: - state = State( - self.name, - splitter=None, - other_states=upstream_states, - combiner=None, - ) - else: - state = None - self._state = state - return state - - def _upstream_states(self): - """Get the states of the upstream nodes that are connected to this node""" - upstream_states = {} - for inpt_name, val in self.input_values: - if isinstance(val, lazy.LazyOutField) and val.node.state: - node: Node = val.node - # variables that are part of inner splitters should be treated as a containers - if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: - node._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 - # adding task_name: (task.state, [a field from the connection] - if node.name not in upstream_states: - upstream_states[node.name] = (node.state, [inpt_name]) - else: - # if the task already exist in other_state, - # additional field name should be added to the list of fields - upstream_states[node.name][1].append(inpt_name) - return upstream_states + self._set_state(other_states=self._get_upstream_states()) + return self._state @property def input_values(self) -> tuple[tuple[str, ty.Any]]: @@ -222,8 +193,7 @@ def split( new_inputs[inpt_name] = new_val # Update the inputs with the new split values self._spec = attrs.evolve(self._spec, **new_inputs) - if not self._state or splitter != self._state.splitter: - self._set_state(splitter) + self._set_state(splitter=splitter) # Wrap types of lazy outputs in StateArray types self._wrap_lzout_types_in_state_arrays() return self @@ -272,39 +242,10 @@ def combine( "combiner has been already set, " "if you want to overwrite it - use overwrite=True" ) - if not self._state: - self.split(splitter=None) - # a task can have a combiner without a splitter - # if is connected to one with a splitter; - # self.fut_combiner will be used later as a combiner - self._state.fut_combiner = ( - combiner # QUESTION: why separate combiner and fut_combiner? - ) - else: # self.state and not self.state.combiner - self._set_state(splitter=self._state.splitter, combiner=combiner) + self._set_state(combiner=combiner) self._wrap_lzout_types_in_state_arrays() return self - def _set_state(self, splitter, combiner=None): - """ - Set a particular state on this task. - - Parameters - ---------- - splitter : str | list[str] | tuple[str] - the fields which to split over. If splitting over multiple fields, lists of - fields are interpreted as outer-products and tuples inner-products. If None, - then the fields to split are taken from the keyword-arg names. - combiner : list[str] | str, optional - the field or list of inputs to be combined (i.e. not left split) after the - task has been run - """ - if splitter is not None: - self._state = State(name=self.name, splitter=splitter, combiner=combiner) - else: - self._state = None - return self._state - @property def cont_dim(self): # adding inner_cont_dim to the general container_dimension provided by the users @@ -353,40 +294,50 @@ def _wrap_lzout_types_in_state_arrays(self) -> None: if not self.state: return outpt_lf: lazy.LazyOutField - remaining_splits = [] - for split in self.state.splitter: - if isinstance(split, str): - if split not in self.state.combiner: - remaining_splits.append(split) - elif all(s not in self.state.combiner for s in split): - remaining_splits.append(split) - state_depth = len(remaining_splits) for outpt_lf in attrs_values(self.lzout).values(): assert not outpt_lf.type_checked type_, _ = TypeParser.strip_splits(outpt_lf.type) - for _ in range(state_depth): + for _ in range(self._state.depth): type_ = StateArray[type_] outpt_lf.type = type_ - # @classmethod - # def _normalize_splitter( - # cls, splitter: Splitter, strip_previous: bool = True - # ) -> ty.Tuple[ty.Tuple[str, ...], ...]: - # """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form - # used in LazyFields""" - # if isinstance(splitter, str): - # splitter = (splitter,) - # if isinstance(splitter, tuple): - # splitter = (splitter,) # type: ignore - # else: - # assert isinstance(splitter, list) - # # convert to frozenset to differentiate from tuple, yet still be hashable - # # (NB: order of fields in list splitters aren't relevant) - # splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) - # # Strip out fields starting with "_" designating splits in upstream nodes - # if strip_previous: - # stripped = tuple( - # tuple(f for f in i if not f.startswith("_")) for i in splitter - # ) - # splitter = tuple(s for s in stripped if s) # type: ignore - # return splitter # type: ignore + def _set_state( + self, + splitter: list[str] | tuple[str, ...] | None = None, + combiner: list[str] | None = None, + other_states: dict[str, tuple["State", list[str]]] | None = None, + ) -> None: + if self._state not in (NOT_SET, None): + if splitter is None: + splitter = self._state.current_splitter + if combiner is None: + combiner = self._state.current_combiner + if other_states is None: + other_states = self._state.other_states + if not (splitter or combiner or other_states): + self._state = None + else: + self._state = State( + self.name, + splitter=splitter, + other_states=other_states, + combiner=combiner, + ) + + def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: + """Get the states of the upstream nodes that are connected to this node""" + upstream_states = {} + for inpt_name, val in self.input_values: + if isinstance(val, lazy.LazyOutField) and val.node.state: + node: Node = val.node + # variables that are part of inner splitters should be treated as a containers + if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: + node._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 + # adding task_name: (task.state, [a field from the connection] + if node.name not in upstream_states: + upstream_states[node.name] = (node.state, [inpt_name]) + else: + # if the task already exist in other_state, + # additional field name should be added to the list of fields + upstream_states[node.name][1].append(inpt_name) + return upstream_states From 0b8ac665844b0ad2a048967b5086ac5b78bffbb1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 10 Dec 2024 20:28:02 +1100 Subject: [PATCH 063/342] added tests to demonstrate nested workflows --- pydra/design/tests/test_workflow.py | 82 +++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 0b0a47eb63..34cd564770 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -1,4 +1,5 @@ from operator import attrgetter +from copy import copy import pytest import attrs from pydra.engine.workflow.base import Workflow @@ -401,3 +402,84 @@ def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: with pytest.raises(RuntimeError, match="Outputs .* have already been accessed"): Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) + + +def test_nested_workflow(): + """Simple test of a nested workflow""" + + @python.define + def Add(x: float, y: float) -> float: + return x + y + + @python.define + def Mul(x: float, y: float) -> float: + return x * y + + @python.define + def Divide(x: float, y: float) -> float: + return x / y + + @python.define + def Power(x: float, y: float) -> float: + return x**y + + @workflow.define + def NestedWorkflow(a: float, b: float, c: float) -> float: + pow = workflow.add(Power(x=a, y=c)) + add = workflow.add(Add(x=pow.out, y=b)) + return add.out + + @workflow.define + def MyTestWorkflow(a: int, b: float, c: float) -> float: + div = workflow.add(Divide(x=a, y=b)) + nested = workflow.add(NestedWorkflow(a=div.out, b=b, c=c)) + return nested.out + + wf = Workflow.construct(MyTestWorkflow(a=1, b=10.0, c=2.0)) + assert wf.inputs.a == 1 + assert wf.inputs.b == 10.0 + assert wf.inputs.c == 2.0 + assert wf.outputs.out == LazyOutField( + node=wf["NestedWorkflow"], field="out", type=float, type_checked=True + ) + assert list(wf.node_names) == ["Divide", "NestedWorkflow"] + nwf_spec = copy(wf["NestedWorkflow"]._spec) + nwf_spec.a = 100.0 + nwf = Workflow.construct(nwf_spec) + nwf.inputs.a == 100.0 + nwf.inputs.b == 10.0 + nwf.inputs.c == 2.0 + nwf.outputs.out == LazyOutField(node=nwf["Add"], field="out", type=float) + assert list(nwf.node_names) == ["Power", "Add"] + + +def test_recursively_nested_conditional_workflow(): + """More complex nested workflow example demonstrating conditional branching at run + time""" + + @python.define + def Add(x: float, y: float) -> float: + return x + y + + @python.define + def Subtract(x: float, y: float) -> float: + return x - y + + @workflow.define + def RecursiveNestedWorkflow(a: float, depth: int) -> float: + add = workflow.add(Add(x=a, y=1)) + decrement_depth = workflow.add(Subtract(x=depth, y=1)) + if depth > 0: + out_node = workflow.add( + RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out) + ) + else: + out_node = add + return out_node.out + + wf = Workflow.construct(RecursiveNestedWorkflow(a=1, depth=3)) + assert wf.inputs.a == 1 + assert wf.inputs.depth == 3 + assert wf.outputs.out == LazyOutField( + node=wf["RecursiveNestedWorkflow"], field="out", type=float, type_checked=True + ) From 000140e323bcac09979c35b9b52889191e2b1427 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 10 Dec 2024 20:28:36 +1100 Subject: [PATCH 064/342] added check for lazy vals to non-lazy inputs in workflow construct --- pydra/engine/workflow/base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index 190561dea6..b3aa001714 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -3,7 +3,7 @@ from operator import itemgetter from typing_extensions import Self import attrs -from pydra.engine.helpers import list_fields, attrs_values +from pydra.engine.helpers import list_fields, attrs_values, is_lazy from pydra.engine.specs import TaskSpec, Outputs, WorkflowOutputs from .lazy import LazyInField from pydra.utils.hash import hash_function @@ -53,6 +53,10 @@ def construct( key=itemgetter(0), ) ) + if lazy_non_lazy_vals := [f for f in non_lazy_vals if is_lazy(f[1])]: + raise ValueError( + f"Lazy input fields {lazy_non_lazy_vals} found in non-lazy fields " + ) hash_key = hash_function(non_lazy_vals) if hash_key in cls._constructed: return cls._constructed[hash_key] From 5e898af7709dfdbb521b927e9873199e850fc6bc Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 13 Dec 2024 16:28:27 +1100 Subject: [PATCH 065/342] shell tasks now execute --- pydra/design/base.py | 28 +- pydra/design/shell.py | 1 + pydra/design/tests/test_shell.py | 19 +- pydra/design/tests/test_workflow.py | 8 +- pydra/engine/boutiques.py | 10 +- pydra/engine/core.py | 234 +++-------- pydra/engine/environments.py | 13 +- pydra/engine/specs.py | 384 ++++++++++++++++-- pydra/engine/task.py | 301 +------------- pydra/engine/tests/test_boutiques.py | 12 +- pydra/engine/tests/test_dockertask.py | 4 +- pydra/engine/tests/test_helpers_file.py | 6 +- pydra/engine/tests/test_nipype1_convert.py | 6 +- pydra/engine/tests/test_shelltask.py | 88 ++-- .../engine/tests/test_shelltask_inputspec.py | 38 +- pydra/engine/tests/test_task.py | 4 +- pydra/engine/workflow/node.py | 97 ++++- pydra/utils/typing.py | 11 + 18 files changed, 660 insertions(+), 604 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index feff4a4467..61394e7a68 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -9,7 +9,7 @@ import attrs.validators from attrs.converters import default_if_none from fileformats.generic import File -from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union +from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union, is_type from pydra.engine.helpers import ( from_list_if_single, ensure_list, @@ -52,11 +52,6 @@ def __bool__(self): EMPTY = _Empty.EMPTY # To provide a blank placeholder for the default field -def is_type(_, __, val: ty.Any) -> bool: - """check that the value is a type or generic""" - return inspect.isclass(val) or ty.get_origin(val) - - def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: """Ensure the default value has been coerced into the correct type""" if value is EMPTY: @@ -400,6 +395,10 @@ def make_task_spec( if name is None and klass is not None: name = klass.__name__ + if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: + raise ValueError( + f"{reserved_names} are reserved and cannot be used for {spec_type} field names" + ) outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) if klass is None or not issubclass(klass, spec_type): if name is None: @@ -503,7 +502,7 @@ def make_outputs_spec( outputs_bases = bases + (spec_type,) if reserved_names := [n for n in outputs if n in spec_type.RESERVED_FIELD_NAMES]: raise ValueError( - f"{reserved_names} are reserved and cannot be used for output field names" + f"{reserved_names} are reserved and cannot be used for {spec_type} field names" ) # Add in any fields in base classes that haven't already been converted into attrs # fields (e.g. stdout, stderr and return_code) @@ -585,12 +584,25 @@ def ensure_field_objects( arg.name = input_name if not arg.help_string: arg.help_string = input_helps.get(input_name, "") - else: + elif is_type(arg): inputs[input_name] = arg_type( type=arg, name=input_name, help_string=input_helps.get(input_name, ""), ) + elif isinstance(arg, dict): + arg_kwds = copy(arg) + if "help_string" not in arg_kwds: + arg_kwds["help_string"] = input_helps.get(input_name, "") + inputs[input_name] = arg_type( + name=input_name, + **arg_kwds, + ) + else: + raise ValueError( + f"Input {input_name} must be an instance of {Arg}, a type, or a dictionary " + f" of keyword arguments to pass to {Arg}, not {arg}" + ) for output_name, out in list(outputs.items()): if isinstance(out, Out): diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 346ebd2eed..2410a410ae 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -343,6 +343,7 @@ def make( argstr="", position=0, default=executable, + validator=attrs.validators.min_len(1), help_string=EXECUTABLE_HELP_STRING, ) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 1cbae39be8..e25d2c7a5d 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -32,6 +32,7 @@ def test_interface_template(): assert sorted_fields(SampleInterface) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], position=0, @@ -80,6 +81,7 @@ def test_interface_template_w_types_and_path_template_ext(): assert sorted_fields(SampleInterface) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="trim-png", type=str | ty.Sequence[str], position=0, @@ -119,6 +121,7 @@ def test_interface_template_w_modify(): assert sorted_fields(SampleInterface) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="trim-png", type=str | ty.Sequence[str], position=0, @@ -176,6 +179,7 @@ def test_interface_template_more_complex(): assert sorted_fields(SampleInterface) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], position=0, @@ -273,6 +277,7 @@ def test_interface_template_with_overrides_and_optionals(): == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], position=0, @@ -347,6 +352,7 @@ def test_interface_template_with_defaults(): assert sorted_fields(SampleInterface) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], position=0, @@ -414,6 +420,7 @@ def test_interface_template_with_type_overrides(): assert sorted_fields(SampleInterface) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], position=0, @@ -545,6 +552,7 @@ class Outputs(ShellOutputs): type=bool, help_string="Show complete date in long format", argstr="-T", + default=False, requires=["long_format"], xor=["date_format_str"], ), @@ -606,7 +614,7 @@ def test_shell_pickle_roundtrip(Ls, tmp_path): assert RereadLs is Ls -@pytest.mark.xfail(reason="Still need to update tasks to use new shell interface") +# @pytest.mark.xfail(reason="Still need to update tasks to use new shell interface") def test_shell_run(Ls, tmp_path): Path.touch(tmp_path / "a") Path.touch(tmp_path / "b") @@ -615,16 +623,16 @@ def test_shell_run(Ls, tmp_path): ls = Ls(directory=tmp_path, long_format=True) # Test cmdline - assert ls.inputs.directory == tmp_path - assert not ls.inputs.hidden - assert ls.inputs.long_format + assert ls.directory == Directory(tmp_path) + assert not ls.hidden + assert ls.long_format assert ls.cmdline == f"ls -l {tmp_path}" # Drop Long format flag to make output simpler ls = Ls(directory=tmp_path) result = ls() - assert result.output.entries == ["a", "b", "c"] + assert sorted(result.output.entries) == ["a", "b", "c"] @pytest.fixture(params=["static", "dynamic"]) @@ -721,6 +729,7 @@ class Outputs: assert sorted_fields(A) == [ shell.arg( name="executable", + validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], argstr="", diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 34cd564770..d6b11ab565 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -93,14 +93,8 @@ def MyTestShellWorkflow( ) output_video = workflow.add( shell.define( - "HandBrakeCLI -i -o " + "HandBrakeCLI -i -o " "--width --height ", - # By default any input/output specified with a flag (e.g. -i ) - # is considered optional, i.e. of type `FsObject | None`, and therefore - # won't be used by default. By overriding this with non-optional types, - # the fields are specified as being required. - inputs={"in_video": video.Mp4}, - outputs={"out_video": video.Mp4}, )(in_video=add_watermark.out_video, width=1280, height=720), name="resize", ).out_video diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py index 8d7782b3e5..d12d30f1d4 100644 --- a/pydra/engine/boutiques.py +++ b/pydra/engine/boutiques.py @@ -182,9 +182,9 @@ def _command_args_single(self, state_ind=None, index=None): """Get command line arguments for a single state""" input_filepath = self._bosh_invocation_file(state_ind=state_ind, index=index) cmd_list = ( - self.inputs.executable + self.spec.executable + [str(self.bosh_file), input_filepath] - + self.inputs.args + + self.spec.args + self.bindings ) return cmd_list @@ -192,11 +192,11 @@ def _command_args_single(self, state_ind=None, index=None): def _bosh_invocation_file(self, state_ind=None, index=None): """creating bosh invocation file - json file with inputs values""" input_json = {} - for f in attrs_fields(self.inputs, exclude_names=("executable", "args")): + for f in attrs_fields(self.spec, exclude_names=("executable", "args")): if self.state and f"{self.name}.{f.name}" in state_ind: - value = getattr(self.inputs, f.name)[state_ind[f"{self.name}.{f.name}"]] + value = getattr(self.spec, f.name)[state_ind[f"{self.name}.{f.name}"]] else: - value = getattr(self.inputs, f.name) + value = getattr(self.spec, f.name) # adding to the json file if specified by the user if value is not attr.NOTHING and value != "NOTHING": if is_local_file(f): diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 18631d38ea..f74264dfb8 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -7,7 +7,7 @@ import sys from pathlib import Path import typing as ty -from copy import deepcopy, copy +from copy import deepcopy from uuid import uuid4 from filelock import SoftFileLock import shutil @@ -22,6 +22,7 @@ RuntimeSpec, Result, TaskHook, + TaskSpec, ) from .helpers import ( create_checksum, @@ -76,6 +77,9 @@ class Task: _cache_dir = None # Working directory in which to operate _references = None # List of references for a task + name: str + spec: TaskSpec + def __init__( self, spec, @@ -131,23 +135,12 @@ def __init__( if Task._etelemetry_version_data is None: Task._etelemetry_version_data = check_latest_version() - self.interface = spec - # raise error if name is same as of attributes + self.spec = spec self.name = name - if not self.input_spec: - raise Exception("No input_spec in class: %s" % self.__class__.__name__) - - self.inputs = self.interface( - **{ - # in attrs names that starts with "_" could be set when name provided w/o "_" - (f.name[1:] if f.name.startswith("_") else f.name): f.default - for f in attr.fields(type(self.interface)) - } - ) self.input_names = [ field.name - for field in attr.fields(type(self.interface)) + for field in attr.fields(type(self.spec)) if field.name not in ["_func", "_graph_checksums"] ] @@ -164,17 +157,11 @@ def __init__( raise ValueError(f"Unknown input set {inputs!r}") inputs = self._input_sets[inputs] - self.inputs = attr.evolve(self.inputs, **inputs) + self.spec = attr.evolve(self.spec, **inputs) # checking if metadata is set properly - self.inputs.check_metadata() - # dictionary to save the connections with lazy fields - self.inp_lf = {} - self.state = None - # container dimensions provided by the user - self.cont_dim = cont_dim - # container dimension for inner input if needed (e.g. for inner splitter) - self._inner_cont_dim = {} + self.spec._check_resolved() + self.spec._check_rules() self._output = {} self._result = {} # flag that says if node finished all jobs @@ -206,18 +193,11 @@ def __str__(self): def __getstate__(self): state = self.__dict__.copy() - state["interface"] = cp.dumps(state["interface"]) - inputs = {} - for k, v in attr.asdict(state["inputs"], recurse=False).items(): - if k.startswith("_"): - k = k[1:] - inputs[k] = v - state["inputs"] = inputs + state["spec"] = cp.dumps(state["spec"]) return state def __setstate__(self, state): - state["interface"] = cp.loads(state["interface"]) - state["inputs"] = self.interface(**state["inputs"]) + state["spec"] = cp.loads(state["spec"]) self.__dict__.update(state) def help(self, returnhelp=False): @@ -243,63 +223,10 @@ def checksum(self): and to create nodes checksums needed for graph checksums (before the tasks have inputs etc.) """ - input_hash = self.inputs.hash - if self.state is None: - self._checksum = create_checksum(self.__class__.__name__, input_hash) - else: - splitter_hash = hash_function(self.state.splitter) - self._checksum = create_checksum( - self.__class__.__name__, hash_function([input_hash, splitter_hash]) - ) + input_hash = self.spec._hash + self._checksum = create_checksum(self.__class__.__name__, input_hash) return self._checksum - def checksum_states(self, state_index=None): - """ - Calculate a checksum for the specific state or all of the states of the task. - Replaces state-arrays in the inputs fields with a specific values for states. - Used to recreate names of the task directories, - - Parameters - ---------- - state_index : - TODO - - """ - if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING: - self.inputs._graph_checksums = { - nd.name: nd.checksum for nd in self.graph_sorted - } - - if state_index is not None: - inputs_copy = copy(self.inputs) - for key, ind in self.state.inputs_ind[state_index].items(): - val = self._extract_input_el( - inputs=self.inputs, inp_nm=key.split(".")[1], ind=ind - ) - setattr(inputs_copy, key.split(".")[1], val) - # setting files_hash again in case it was cleaned by setting specific element - # that might be important for outer splitter of input variable with big files - # the file can be changed with every single index even if there are only two files - input_hash = inputs_copy.hash - if is_workflow(self): - con_hash = hash_function(self._connections) - # TODO: hash list is not used - hash_list = [input_hash, con_hash] # noqa: F841 - checksum_ind = create_checksum( - self.__class__.__name__, self._checksum_wf(input_hash) - ) - else: - checksum_ind = create_checksum(self.__class__.__name__, input_hash) - return checksum_ind - else: - checksum_list = [] - if not hasattr(self.state, "inputs_ind"): - self.state.prepare_states(self.inputs, cont_dim=self.cont_dim) - self.state.prepare_inputs() - for ind in range(len(self.state.inputs_ind)): - checksum_list.append(self.checksum_states(state_index=ind)) - return checksum_list - @property def uid(self): """the unique id number for the task @@ -333,7 +260,7 @@ def output_names(self): """Get the names of the outputs from the task's output_spec (not everything has to be generated, see generated_output_names). """ - return [f.name for f in attr.fields(self.interface.Outputs)] + return [f.name for f in attr.fields(self.spec.Outputs)] @property def generated_output_names(self): @@ -342,13 +269,13 @@ def generated_output_names(self): it uses output_names. The results depends on the input provided to the task """ - output_klass = self.interface.Outputs + output_klass = self.spec.Outputs if hasattr(output_klass, "_generated_output_names"): output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} ) # using updated input (after filing the templates) - _inputs = deepcopy(self.inputs) + _inputs = deepcopy(self.spec) modified_inputs = template_update(_inputs, self.output_dir) if modified_inputs: _inputs = attr.evolve(_inputs, **modified_inputs) @@ -397,8 +324,6 @@ def cache_locations(self, locations): @property def output_dir(self): """Get the filesystem path where outputs will be written.""" - if self.state: - return [self._cache_dir / checksum for checksum in self.checksum_states()] return self._cache_dir / self.checksum @property @@ -434,7 +359,7 @@ def __call__( pass # if there is plugin provided or the task is a Workflow or has a state, # the submitter will be created using provided plugin, self.plugin or "cf" - elif plugin or self.state or is_workflow(self): + elif plugin: plugin = plugin or self.plugin or "cf" if plugin_kwargs is None: plugin_kwargs = {} @@ -442,7 +367,7 @@ def __call__( if submitter: with submitter as sub: - self.inputs = attr.evolve(self.inputs, **kwargs) + self.spec = attr.evolve(self.spec, **kwargs) res = sub(self, environment=environment) else: # tasks without state could be run without a submitter res = self._run(rerun=rerun, environment=environment, **kwargs) @@ -462,10 +387,10 @@ def _modify_inputs(self): from pydra.utils.typing import TypeParser orig_inputs = { - k: v for k, v in attrs_values(self.inputs).items() if not k.startswith("_") + k: v for k, v in attrs_values(self.spec).items() if not k.startswith("_") } map_copyfiles = {} - input_fields = attr.fields(type(self.inputs)) + input_fields = attr.fields(type(self.spec)) for name, value in orig_inputs.items(): fld = getattr(input_fields, name) copy_mode, copy_collation = parse_copyfile( @@ -484,7 +409,7 @@ def _modify_inputs(self): if value is not copied_value: map_copyfiles[name] = copied_value modified_inputs = template_update( - self.inputs, self.output_dir, map_copyfiles=map_copyfiles + self.spec, self.output_dir, map_copyfiles=map_copyfiles ) assert all(m in orig_inputs for m in modified_inputs), ( "Modified inputs contain fields not present in original inputs. " @@ -497,7 +422,7 @@ def _modify_inputs(self): # Ensure we pass a copy not the original just in case inner # attributes are modified during execution value = deepcopy(orig_value) - setattr(self.inputs, name, value) + setattr(self.spec, name, value) return orig_inputs def _populate_filesystem(self, checksum, output_dir): @@ -517,10 +442,7 @@ def _populate_filesystem(self, checksum, output_dir): shutil.rmtree(output_dir) output_dir.mkdir(parents=False, exist_ok=self.can_resume) - def _run(self, rerun=False, environment=None, **kwargs): - self.inputs = attr.evolve(self.inputs, **kwargs) - self.inputs.check_fields_input_spec() - + def _run(self, rerun=False, environment=None): checksum = self.checksum output_dir = self.output_dir lockfile = self.cache_dir / (checksum + ".lock") @@ -535,7 +457,6 @@ def _run(self, rerun=False, environment=None, **kwargs): cwd = os.getcwd() self._populate_filesystem(checksum, output_dir) os.chdir(output_dir) - orig_inputs = self._modify_inputs() result = Result(output=None, runtime=None, errored=False) self.hooks.pre_run_task(self) self.audit.start_audit(odir=output_dir) @@ -544,7 +465,7 @@ def _run(self, rerun=False, environment=None, **kwargs): try: self.audit.monitor() self._run_task(environment=environment) - result.output = self._collect_outputs(output_dir=output_dir) + result.output = self.spec.Outputs.from_task(self) except Exception: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) @@ -558,8 +479,6 @@ def _run(self, rerun=False, environment=None, **kwargs): # removing the additional file with the checksum (self.cache_dir / f"{self.uid}_info.json").unlink() # Restore original values to inputs - for field_name, field_value in orig_inputs.items(): - setattr(self.inputs, field_name, field_value) os.chdir(cwd) self.hooks.post_run(self, result) # Check for any changes to the input hashes that have occurred during the execution @@ -567,16 +486,6 @@ def _run(self, rerun=False, environment=None, **kwargs): self._check_for_hash_changes() return result - def _collect_outputs(self, output_dir): - output_klass = self.interface.Outputs - output = output_klass( - **{f.name: attr.NOTHING for f in attr.fields(output_klass)} - ) - other_output = output.collect_additional_outputs( - self.inputs, output_dir, self.output_ - ) - return attr.evolve(output, **self.output_, **other_output) - def _extract_input_el(self, inputs, inp_nm, ind): """ Extracting element of the inputs taking into account @@ -603,7 +512,7 @@ def get_input_el(self, ind): for inp in set(self.input_names): if f"{self.name}.{inp}" in input_ind: inputs_dict[inp] = self._extract_input_el( - inputs=self.inputs, + inputs=self.spec, inp_nm=inp, ind=input_ind[f"{self.name}.{inp}"], ) @@ -626,7 +535,7 @@ def pickle_task(self): def done(self): """Check whether the tasks has been finalized and all outputs are stored.""" # if any of the field is lazy, there is no need to check results - if has_lazy(self.inputs): + if has_lazy(self.spec): return False _result = self.result() if self.state: @@ -699,73 +608,40 @@ def result(self, state_index=None, return_inputs=False): # return a future if not if self.errored: return Result(output=None, runtime=None, errored=True) - if self.state: - if state_index is None: - # if state_index=None, collecting all results - if self.state.combiner: - return self._combined_output(return_inputs=return_inputs) - else: - results = [] - for ind in range(len(self.state.inputs_ind)): - checksum = self.checksum_states(state_index=ind) - result = load_result(checksum, self.cache_locations) - if result is None: - return None - results.append(result) - if return_inputs is True or return_inputs == "val": - return list(zip(self.state.states_val, results)) - elif return_inputs == "ind": - return list(zip(self.state.states_ind, results)) - else: - return results - else: # state_index is not None - if self.state.combiner: - return self._combined_output(return_inputs=return_inputs)[ - state_index - ] - result = load_result( - self.checksum_states(state_index), self.cache_locations - ) - if return_inputs is True or return_inputs == "val": - return (self.state.states_val[state_index], result) - elif return_inputs == "ind": - return (self.state.states_ind[state_index], result) - else: - return result + + if state_index is not None: + raise ValueError("Task does not have a state") + checksum = self.checksum + result = load_result(checksum, self.cache_locations) + if result and result.errored: + self._errored = True + if return_inputs is True or return_inputs == "val": + inputs_val = { + f"{self.name}.{inp}": getattr(self.spec, inp) + for inp in self.input_names + } + return (inputs_val, result) + elif return_inputs == "ind": + inputs_ind = {f"{self.name}.{inp}": None for inp in self.input_names} + return (inputs_ind, result) else: - if state_index is not None: - raise ValueError("Task does not have a state") - checksum = self.checksum - result = load_result(checksum, self.cache_locations) - if result and result.errored: - self._errored = True - if return_inputs is True or return_inputs == "val": - inputs_val = { - f"{self.name}.{inp}": getattr(self.inputs, inp) - for inp in self.input_names - } - return (inputs_val, result) - elif return_inputs == "ind": - inputs_ind = {f"{self.name}.{inp}": None for inp in self.input_names} - return (inputs_ind, result) - else: - return result + return result def _reset(self): """Reset the connections between inputs and LazyFields.""" - for field in attrs_fields(self.inputs): + for field in attrs_fields(self.spec): if field.name in self.inp_lf: - setattr(self.inputs, field.name, self.inp_lf[field.name]) + setattr(self.spec, field.name, self.inp_lf[field.name]) if is_workflow(self): for task in self.graph.nodes: task._reset() def _check_for_hash_changes(self): - hash_changes = self.inputs.hash_changes() + hash_changes = self.spec._hash_changes() details = "" for changed in hash_changes: - field = getattr(attr.fields(type(self.inputs)), changed) - val = getattr(self.inputs, changed) + field = getattr(attr.fields(type(self.spec)), changed) + val = getattr(self.spec, changed) field_type = type(val) if issubclass(field.type, FileSet): details += ( @@ -797,8 +673,8 @@ def _check_for_hash_changes(self): "Input values and hashes for '%s' %s node:\n%s\n%s", self.name, type(self).__name__, - self.inputs, - self.inputs._hashes, + self.spec, + self.spec._hashes, ) SUPPORTED_COPY_MODES = FileSet.CopyMode.any @@ -906,12 +782,12 @@ def checksum(self): (before the tasks have inputs etc.) """ # if checksum is called before run the _graph_checksums is not ready - if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING: - self.inputs._graph_checksums = { + if is_workflow(self) and self.spec._graph_checksums is attr.NOTHING: + self.spec._graph_checksums = { nd.name: nd.checksum for nd in self.graph_sorted } - input_hash = self.inputs.hash + input_hash = self.spec.hash if not self.state: self._checksum = create_checksum( self.__class__.__name__, self._checksum_wf(input_hash) @@ -1190,7 +1066,7 @@ async def _run_task(self, submitter, rerun=False, environment=None): # logger.info("Added %s to %s", self.output_spec, self) def _collect_outputs(self): - output_klass = self.interface.Outputs + output_klass = self.spec.Outputs output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} ) diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 0c57008058..80193c87db 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -1,7 +1,10 @@ +import typing as ty from .helpers import execute - from pathlib import Path +if ty.TYPE_CHECKING: + from pydra.engine.task import ShellTask + class Environment: """ @@ -14,7 +17,7 @@ class Environment: def setup(self): pass - def execute(self, task): + def execute(self, task: "ShellTask"): """ Execute the task in the environment. @@ -39,7 +42,7 @@ class Native(Environment): Native environment, i.e. the tasks are executed in the current python environment. """ - def execute(self, task): + def execute(self, task: "ShellTask"): keys = ["return_code", "stdout", "stderr"] values = execute(task.command_args(), strip=task.strip) output = dict(zip(keys, values)) @@ -87,7 +90,7 @@ def bind(self, loc, mode="ro"): class Docker(Container): """Docker environment.""" - def execute(self, task): + def execute(self, task: "ShellTask"): docker_img = f"{self.image}:{self.tag}" # mounting all input locations mounts = task.get_bindings(root=self.root) @@ -123,7 +126,7 @@ def execute(self, task): class Singularity(Container): """Singularity environment.""" - def execute(self, task): + def execute(self, task: "ShellTask"): singularity_img = f"{self.image}:{self.tag}" # mounting all input locations mounts = task.get_bindings(root=self.root) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index a643f0fef2..b026c342cc 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -3,8 +3,11 @@ import os from pathlib import Path import re +from copy import copy import inspect import itertools +import platform +import shlex import typing as ty from glob import glob from copy import deepcopy @@ -13,22 +16,56 @@ from fileformats.generic import File from pydra.engine.audit import AuditFlag from pydra.utils.typing import TypeParser, MultiOutputObj -from .helpers import attrs_fields, attrs_values, is_lazy, list_fields -from .helpers_file import template_update_single +from .helpers import ( + attrs_fields, + attrs_values, + is_lazy, + list_fields, + position_sort, + ensure_list, + parse_format_string, +) +from .helpers_file import template_update_single, template_update from pydra.utils.hash import hash_function, Cache -from pydra.design.base import Field, Arg, Out, RequirementSet +from pydra.design.base import Field, Arg, Out, RequirementSet, EMPTY from pydra.design import shell +if ty.TYPE_CHECKING: + from pydra.engine.core import Task + from pydra.engine.task import ShellTask + def is_set(value: ty.Any) -> bool: """Check if a value has been set.""" - return value is not attrs.NOTHING + return value not in (attrs.NOTHING, EMPTY) class Outputs: """Base class for all output specifications""" - RESERVED_FIELD_NAMES = ("split", "combine") + RESERVED_FIELD_NAMES = ("inputs", "split", "combine") + + @classmethod + def from_task(cls, task: "Task") -> Self: + """Collect the outputs of a task from a combination of the provided inputs, + the objects in the output directory, and the stdout and stderr of the process. + + Parameters + ---------- + task : Task + The task whose outputs are being collected. + + Returns + ------- + outputs : Outputs + The outputs of the task + """ + return cls(**{f.name: attrs.NOTHING for f in attrs_fields(cls)}) + + @property + def inputs(self): + """The inputs object associated with a lazy-outputs object""" + return self._get_node().inputs def split( self, @@ -62,7 +99,9 @@ def split( self : TaskBase a reference to the task """ - self._node.split(splitter, overwrite=overwrite, cont_dim=cont_dim, **inputs) + self._get_node().split( + splitter, overwrite=overwrite, cont_dim=cont_dim, **inputs + ) return self def combine( @@ -89,9 +128,17 @@ def combine( self : Self a reference to the outputs object """ - self._node.combine(combiner, overwrite=overwrite) + self._get_node().combine(combiner, overwrite=overwrite) return self + def _get_node(self): + try: + return self._node + except AttributeError: + raise AttributeError( + f"{self} outputs object is not a lazy output of a workflow node" + ) + OutputsType = ty.TypeVar("OutputType", bound=Outputs) @@ -101,6 +148,8 @@ class TaskSpec(ty.Generic[OutputsType]): Task: "ty.Type[core.Task]" + RESERVED_FIELD_NAMES = () + def __call__( self, name: str | None = None, @@ -116,7 +165,7 @@ def __call__( ): self._check_rules() task = self.Task( - self, + spec=self, name=name, audit_flags=audit_flags, cache_dir=cache_dir, @@ -175,6 +224,9 @@ def _check_rules(self): for field in list_fields(self): value = getattr(self, field.name) + if is_lazy(value): + continue + # Collect alternative fields associated with this field. if field.xor: alternative_fields = { @@ -182,9 +234,7 @@ def _check_rules(self): for name in field.xor if name != field.name } - set_alternatives = { - n: v for n, v in alternative_fields.items() if is_set(v) - } + set_alternatives = {n: v for n, v in alternative_fields.items() if v} # Raise error if no field in mandatory alternative group is set. if not is_set(value): @@ -206,10 +256,19 @@ def _check_rules(self): ) # Raise error if any required field is unset. - if field.requires and not any(rs.satisfied(self) for rs in field.requires): + if ( + value + and field.requires + and not any(rs.satisfied(self) for rs in field.requires) + ): + if len(field.requires) > 1: + qualification = ( + " at least one of the following requirements to be satisfied: " + ) + else: + qualification = "" raise ValueError( - f"{field.name} requires at least one of the requirement sets to be " - f"satisfied: {[str(r) for r in field.requires]}" + f"{field.name!r} requires{qualification} {[str(r) for r in field.requires]}" ) @classmethod @@ -235,6 +294,14 @@ def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: f"of {inpt} " + str(list(unrecognised)) ) + def _check_resolved(self): + """Checks that all the fields in the spec have been resolved""" + if has_lazy_values := [n for n, v in attrs_values(self).items() if is_lazy(v)]: + raise ValueError( + f"Cannot execute {self} because the following fields " + f"still have lazy values {has_lazy_values}" + ) + @attrs.define(kw_only=True) class Runtime: @@ -257,7 +324,7 @@ class Result: errored: bool = False def __getstate__(self): - state = self.__dict__.copy() + state = attrs_values(self) if state["output"] is not None: fields = tuple((el.name, el.type) for el in attrs_fields(state["output"])) state["output_spec"] = (state["output"].__class__.__name__, fields) @@ -348,13 +415,9 @@ class ShellOutputs(Outputs): stderr: str = shell.out(help_string=STDERR_HELP) @classmethod - def collect_outputs( + def from_task( cls, - inputs: "ShellSpec", - output_dir: Path, - stdout: str, - stderr: str, - return_code: int, + task: "ShellTask", ) -> Self: """Collect the outputs of a shell process from a combination of the provided inputs, the objects in the output directory, and the stdout and stderr of the process. @@ -378,9 +441,15 @@ def collect_outputs( The outputs of the shell process """ - outputs = cls(return_code=return_code, stdout=stdout, stderr=stderr) + outputs = cls( + return_code=task.output_["return_code"], + stdout=task.output_["stdout"], + stderr=task.output_["stderr"], + ) fld: shell.out for fld in list_fields(cls): + if fld.name in ["return_code", "stdout", "stderr"]: + continue if not TypeParser.is_subclass( fld.type, ( @@ -399,17 +468,17 @@ def collect_outputs( ) # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides - if isinstance(fld, shell.outarg) and is_set(getattr(inputs, fld.name)): - resolved_value = getattr(inputs, fld.name) + if isinstance(fld, shell.outarg) and is_set(getattr(task.inputs, fld.name)): + resolved_value = getattr(task.spec, fld.name) elif is_set(fld.default): - resolved_value = cls._resolve_default_value(fld, output_dir) + resolved_value = cls._resolve_default_value(fld, task.output_dir) else: if fld.type in [int, float, bool, str, list] and not fld.callable: raise AttributeError( f"{fld.type} has to have a callable in metadata" ) resolved_value = cls._generate_implicit_value( - fld, inputs, output_dir, outputs, stdout, stderr + fld, task.spec, task.output_dir, outputs.stdout, outputs.stderr ) # Set the resolved value setattr(outputs, fld.name, resolved_value) @@ -543,10 +612,204 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellSpec") -> bool class ShellSpec(TaskSpec[ShellOutputsType]): - pass + + RESERVED_FIELD_NAMES = ("cmdline",) + + @property + def cmdline(self) -> str: + """The equivalent command line that would be submitted if the task were run on + the current working directory.""" + # checking the inputs fields before returning the command line + self._check_resolved() + # Skip the executable, which can be a multi-part command, e.g. 'docker run'. + cmd_args = self._command_args() + cmdline = cmd_args[0] + for arg in cmd_args[1:]: + # If there are spaces in the arg, and it is not enclosed by matching + # quotes, add quotes to escape the space. Not sure if this should + # be expanded to include other special characters apart from spaces + if " " in arg: + cmdline += " '" + arg + "'" + else: + cmdline += " " + arg + return cmdline + + def _command_args( + self, + output_dir: Path | None = None, + input_updates: dict[str, ty.Any] | None = None, + root: Path | None = None, + ) -> list[str]: + """Get command line arguments""" + if output_dir is None: + output_dir = Path.cwd() + self._check_resolved() + inputs = attrs_values(self) + modified_inputs = template_update(self, output_dir=output_dir) + if input_updates: + inputs.update(input_updates) + inputs.update(modified_inputs) + pos_args = [] # list for (position, command arg) + self._positions_provided = [] + for field in list_fields(self): + name = field.name + value = inputs[name] + if value is None: + continue + if name == "executable": + pos_args.append(self._command_shelltask_executable(field, value)) + elif name == "args": + pos_val = self._command_shelltask_args(field, value) + if pos_val: + pos_args.append(pos_val) + else: + if name in modified_inputs: + pos_val = self._command_pos_args( + field, value, output_dir, root=root + ) + else: + pos_val = self._command_pos_args(field, value, output_dir, inputs) + if pos_val: + pos_args.append(pos_val) + + # Sort command and arguments by position + cmd_args = position_sort(pos_args) + # pos_args values are each a list of arguments, so concatenate lists after sorting + return sum(cmd_args, []) + + def _command_shelltask_executable( + self, field: shell.arg, value: ty.Any + ) -> tuple[int, ty.Any]: + """Returning position and value for executable ShellTask input""" + pos = 0 # executable should be the first el. of the command + assert value + return pos, ensure_list(value, tuple2list=True) + + def _command_shelltask_args( + self, field: shell.arg, value: ty.Any + ) -> tuple[int, ty.Any]: + """Returning position and value for args ShellTask input""" + pos = -1 # assuming that args is the last el. of the command + if value is None: + return None + else: + return pos, ensure_list(value, tuple2list=True) + + def _command_pos_args( + self, + field: shell.arg, + value: ty.Any, + inputs: dict[str, ty.Any], + output_dir: Path, + root: Path | None = None, + ) -> tuple[int, ty.Any]: + """ + Checking all additional input fields, setting pos to None, if position not set. + Creating a list with additional parts of the command that comes from + the specific field. + """ + if field.argstr is None and field.formatter is None: + # assuming that input that has no argstr is not used in the command, + # or a formatter is not provided too. + return None + if field.position is not None: + if not isinstance(field.position, int): + raise Exception( + f"position should be an integer, but {field.position} given" + ) + # checking if the position is not already used + if field.position in self._positions_provided: + raise Exception( + f"{field.name} can't have provided position, {field.position} is already used" + ) + + self._positions_provided.append(field.position) + + # Shift non-negatives up to allow executable to be 0 + # Shift negatives down to allow args to be -1 + field.position += 1 if field.position >= 0 else -1 + + if value: + if root: # values from templates + value = value.replace(str(output_dir), f"{root}{output_dir}") + + if field.readonly and value is not None: + raise Exception(f"{field.name} is read only, the value can't be provided") + elif value is None and not field.readonly and field.formatter is None: + return None + + cmd_add = [] + # formatter that creates a custom command argument + # it can take the value of the field, all inputs, or the value of other fields. + if field.formatter: + call_args = inspect.getfullargspec(field.formatter) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = value + elif argnm == "inputs": + call_args_val[argnm] = inputs + else: + if argnm in inputs: + call_args_val[argnm] = inputs[argnm] + else: + raise AttributeError( + f"arguments of the formatter function from {field.name} " + f"has to be in inputs or be field or output_dir, " + f"but {argnm} is used" + ) + cmd_el_str = field.formatter(**call_args_val) + cmd_el_str = cmd_el_str.strip().replace(" ", " ") + if cmd_el_str != "": + cmd_add += split_cmd(cmd_el_str) + elif field.type is bool and "{" not in field.argstr: + # if value is simply True the original argstr is used, + # if False, nothing is added to the command. + if value is True: + cmd_add.append(field.argstr) + else: + if ( + field.argstr.endswith("...") + and isinstance(value, ty.Iterable) + and not isinstance(value, (str, bytes)) + ): + field.argstr = field.argstr.replace("...", "") + # if argstr has a more complex form, with "{input_field}" + if "{" in field.argstr and "}" in field.argstr: + argstr_formatted_l = [] + for val in value: + argstr_f = argstr_formatting( + field.argstr, self, value_updates={field.name: val} + ) + argstr_formatted_l.append(f" {argstr_f}") + cmd_el_str = field.sep.join(argstr_formatted_l) + else: # argstr has a simple form, e.g. "-f", or "--f" + cmd_el_str = field.sep.join( + [f" {field.argstr} {val}" for val in value] + ) + else: + # in case there are ... when input is not a list + field.argstr = field.argstr.replace("...", "") + if isinstance(value, ty.Iterable) and not isinstance( + value, (str, bytes) + ): + cmd_el_str = field.sep.join([str(val) for val in value]) + value = cmd_el_str + # if argstr has a more complex form, with "{input_field}" + if "{" in field.argstr and "}" in field.argstr: + cmd_el_str = field.argstr.replace(f"{{{field.name}}}", str(value)) + cmd_el_str = argstr_formatting(cmd_el_str, self.spec) + else: # argstr has a simple form, e.g. "-f", or "--f" + if value: + cmd_el_str = f"{field.argstr} {value}" + else: + cmd_el_str = "" + if cmd_el_str: + cmd_add += split_cmd(cmd_el_str) + return field.position, cmd_add -def donothing(*args, **kwargs): +def donothing(*args: ty.Any, **kwargs: ty.Any) -> None: return None @@ -569,4 +832,69 @@ def reset(self): setattr(self, val, donothing) +def split_cmd(cmd: str): + """Splits a shell command line into separate arguments respecting quotes + + Parameters + ---------- + cmd : str + Command line string or part thereof + + Returns + ------- + str + the command line string split into process args + """ + # Check whether running on posix or Windows system + on_posix = platform.system() != "Windows" + args = shlex.split(cmd, posix=on_posix) + cmd_args = [] + for arg in args: + match = re.match("(['\"])(.*)\\1$", arg) + if match: + cmd_args.append(match.group(2)) + else: + cmd_args.append(arg) + return cmd_args + + +def argstr_formatting( + argstr: str, inputs: dict[str, ty.Any], value_updates: dict[str, ty.Any] = None +): + """formatting argstr that have form {field_name}, + using values from inputs and updating with value_update if provided + """ + # if there is a value that has to be updated (e.g. single value from a list) + # getting all fields that should be formatted, i.e. {field_name}, ... + if value_updates: + inputs = copy(inputs) + inputs.update(value_updates) + inp_fields = parse_format_string(argstr) + val_dict = {} + for fld_name in inp_fields: + fld_value = inputs[fld_name] + fld_attr = getattr(attrs.fields(type(inputs)), fld_name) + if fld_value is None or ( + fld_value is False + and fld_attr.type is not bool + and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) + ): + # if value is NOTHING, nothing should be added to the command + val_dict[fld_name] = "" + else: + val_dict[fld_name] = fld_value + + # formatting string based on the val_dict + argstr_formatted = argstr.format(**val_dict) + # removing extra commas and spaces after removing the field that have NOTHING + argstr_formatted = ( + argstr_formatted.replace("[ ", "[") + .replace(" ]", "]") + .replace("[,", "[") + .replace(",]", "]") + .strip() + ) + return argstr_formatted + + from pydra.engine import core # noqa: E402 diff --git a/pydra/engine/task.py b/pydra/engine/task.py index 78ab415d38..ab226e8ba4 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -41,31 +41,21 @@ from __future__ import annotations -import platform -import re import attr -import attrs -import inspect -import typing as ty -import shlex from pathlib import Path import cloudpickle as cp from fileformats.core import FileSet from .core import Task from pydra.utils.messenger import AuditFlag from .specs import ( + PythonSpec, ShellSpec, attrs_fields, ) from .helpers import ( attrs_values, - is_lazy, - parse_format_string, - position_sort, - ensure_list, parse_copyfile, ) -from .helpers_file import template_update from pydra.utils.typing import TypeParser from .environments import Native @@ -73,12 +63,14 @@ class PythonTask(Task): """Wrap a Python callable as a task element.""" + spec: PythonSpec + def _run_task(self, environment=None): - inputs = attrs_values(self.inputs) + inputs = attrs_values(self.spec) del inputs["_func"] self.output_ = None - output = cp.loads(self.inputs._func)(**inputs) - output_names = [f.name for f in attr.fields(self.interface.Outputs)] + output = cp.loads(self.spec._func)(**inputs) + output_names = [f.name for f in attr.fields(self.spec.Outputs)] if output is None: self.output_ = {nm: None for nm in output_names} elif len(output_names) == 1: @@ -98,6 +90,8 @@ def _run_task(self, environment=None): class ShellTask(Task): """Wrap a shell command as a task element.""" + spec: ShellSpec + def __init__( self, spec: ShellSpec, @@ -137,7 +131,11 @@ def __init__( strip : :obj:`bool` TODO """ + self.return_code = None + self.stdout = None + self.stderr = None super().__init__( + spec=spec, name=name, inputs=kwargs, cont_dim=cont_dim, @@ -174,212 +172,8 @@ def get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: self._prepare_bindings(root=root) return self.bindings - def command_args(self, root=None): - """Get command line arguments""" - if is_lazy(self.inputs): - raise Exception("can't return cmdline, self.inputs has LazyFields") - if self.state: - raise NotImplementedError - - modified_inputs = template_update(self.inputs, output_dir=self.output_dir) - for field_name, field_value in modified_inputs.items(): - setattr(self.inputs, field_name, field_value) - - pos_args = [] # list for (position, command arg) - self._positions_provided = [] - for field in attrs_fields(self.inputs): - name, meta = field.name, field.metadata - if ( - getattr(self.inputs, name) is attr.NOTHING - and not meta.get("readonly") - and not meta.get("formatter") - ): - continue - if name == "executable": - pos_args.append(self._command_shelltask_executable(field)) - elif name == "args": - pos_val = self._command_shelltask_args(field) - if pos_val: - pos_args.append(pos_val) - else: - if name in modified_inputs: - pos_val = self._command_pos_args(field, root=root) - else: - pos_val = self._command_pos_args(field) - if pos_val: - pos_args.append(pos_val) - - # Sort command and arguments by position - cmd_args = position_sort(pos_args) - # pos_args values are each a list of arguments, so concatenate lists after sorting - return sum(cmd_args, []) - - def _field_value(self, field, check_file=False): - """ - Checking value of the specific field, if value is not set, None is returned. - check_file has no effect, but subclasses can use it to validate or modify - filenames. - """ - value = getattr(self.inputs, field.name) - if value == attr.NOTHING: - value = None - return value - - def _command_shelltask_executable(self, field): - """Returning position and value for executable ShellTask input""" - pos = 0 # executable should be the first el. of the command - value = self._field_value(field) - if value is None: - raise ValueError("executable has to be set") - return pos, ensure_list(value, tuple2list=True) - - def _command_shelltask_args(self, field): - """Returning position and value for args ShellTask input""" - pos = -1 # assuming that args is the last el. of the command - value = self._field_value(field, check_file=True) - if value is None: - return None - else: - return pos, ensure_list(value, tuple2list=True) - - def _command_pos_args(self, field, root=None): - """ - Checking all additional input fields, setting pos to None, if position not set. - Creating a list with additional parts of the command that comes from - the specific field. - """ - argstr = field.metadata.get("argstr", None) - formatter = field.metadata.get("formatter", None) - if argstr is None and formatter is None: - # assuming that input that has no argstr is not used in the command, - # or a formatter is not provided too. - return None - pos = field.metadata.get("position", None) - if pos is not None: - if not isinstance(pos, int): - raise Exception(f"position should be an integer, but {pos} given") - # checking if the position is not already used - if pos in self._positions_provided: - raise Exception( - f"{field.name} can't have provided position, {pos} is already used" - ) - - self._positions_provided.append(pos) - - # Shift non-negatives up to allow executable to be 0 - # Shift negatives down to allow args to be -1 - pos += 1 if pos >= 0 else -1 - - value = self._field_value(field, check_file=True) - - if value: - if field.name in self.inputs_mod_root: - value = self.inputs_mod_root[field.name] - elif root: # values from templates - value = value.replace(str(self.output_dir), f"{root}{self.output_dir}") - - if field.metadata.get("readonly", False) and value is not None: - raise Exception(f"{field.name} is read only, the value can't be provided") - elif ( - value is None - and not field.metadata.get("readonly", False) - and formatter is None - ): - return None - - inputs_dict = attrs_values(self.inputs) - - cmd_add = [] - # formatter that creates a custom command argument - # it can take the value of the field, all inputs, or the value of other fields. - if "formatter" in field.metadata: - call_args = inspect.getfullargspec(field.metadata["formatter"]) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = value - elif argnm == "inputs": - call_args_val[argnm] = inputs_dict - else: - if argnm in inputs_dict: - call_args_val[argnm] = inputs_dict[argnm] - else: - raise AttributeError( - f"arguments of the formatter function from {field.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - cmd_el_str = field.metadata["formatter"](**call_args_val) - cmd_el_str = cmd_el_str.strip().replace(" ", " ") - if cmd_el_str != "": - cmd_add += split_cmd(cmd_el_str) - elif field.type is bool and "{" not in argstr: - # if value is simply True the original argstr is used, - # if False, nothing is added to the command. - if value is True: - cmd_add.append(argstr) - else: - sep = field.metadata.get("sep", " ") - if ( - argstr.endswith("...") - and isinstance(value, ty.Iterable) - and not isinstance(value, (str, bytes)) - ): - argstr = argstr.replace("...", "") - # if argstr has a more complex form, with "{input_field}" - if "{" in argstr and "}" in argstr: - argstr_formatted_l = [] - for val in value: - argstr_f = argstr_formatting( - argstr, self.inputs, value_updates={field.name: val} - ) - argstr_formatted_l.append(f" {argstr_f}") - cmd_el_str = sep.join(argstr_formatted_l) - else: # argstr has a simple form, e.g. "-f", or "--f" - cmd_el_str = sep.join([f" {argstr} {val}" for val in value]) - else: - # in case there are ... when input is not a list - argstr = argstr.replace("...", "") - if isinstance(value, ty.Iterable) and not isinstance( - value, (str, bytes) - ): - cmd_el_str = sep.join([str(val) for val in value]) - value = cmd_el_str - # if argstr has a more complex form, with "{input_field}" - if "{" in argstr and "}" in argstr: - cmd_el_str = argstr.replace(f"{{{field.name}}}", str(value)) - cmd_el_str = argstr_formatting(cmd_el_str, self.inputs) - else: # argstr has a simple form, e.g. "-f", or "--f" - if value: - cmd_el_str = f"{argstr} {value}" - else: - cmd_el_str = "" - if cmd_el_str: - cmd_add += split_cmd(cmd_el_str) - return pos, cmd_add - - @property - def cmdline(self): - """Get the actual command line that will be submitted - Returns a list if the task has a state. - """ - if is_lazy(self.inputs): - raise Exception("can't return cmdline, self.inputs has LazyFields") - # checking the inputs fields before returning the command line - self.inputs.check_fields_input_spec() - if self.state: - raise NotImplementedError - # Skip the executable, which can be a multi-part command, e.g. 'docker run'. - cmdline = self.command_args()[0] - for arg in self.command_args()[1:]: - # If there are spaces in the arg, and it is not enclosed by matching - # quotes, add quotes to escape the space. Not sure if this should - # be expanded to include other special characters apart from spaces - if " " in arg: - cmdline += " '" + arg + "'" - else: - cmdline += " " + arg - return cmdline + def command_args(self, root: Path | None = None) -> list[str]: + return self.spec._command_args(input_updates=self.inputs_mod_root, root=root) def _run_task(self, environment=None): if environment is None: @@ -392,9 +186,9 @@ def _prepare_bindings(self, root: str): This updates the ``bindings`` attribute of the current task to make files available in an ``Environment``-defined ``root``. """ - for fld in attrs_fields(self.inputs): + for fld in attrs_fields(self.spec): if TypeParser.contains_type(FileSet, fld.type): - fileset = getattr(self.inputs, fld.name) + fileset = getattr(self.spec, fld.name) copy = parse_copyfile(fld)[0] == FileSet.CopyMode.copy host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") @@ -409,66 +203,3 @@ def _prepare_bindings(self, root: str): ) DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent - - -def split_cmd(cmd: str): - """Splits a shell command line into separate arguments respecting quotes - - Parameters - ---------- - cmd : str - Command line string or part thereof - - Returns - ------- - str - the command line string split into process args - """ - # Check whether running on posix or Windows system - on_posix = platform.system() != "Windows" - args = shlex.split(cmd, posix=on_posix) - cmd_args = [] - for arg in args: - match = re.match("(['\"])(.*)\\1$", arg) - if match: - cmd_args.append(match.group(2)) - else: - cmd_args.append(arg) - return cmd_args - - -def argstr_formatting(argstr, inputs, value_updates=None): - """formatting argstr that have form {field_name}, - using values from inputs and updating with value_update if provided - """ - inputs_dict = attrs_values(inputs) - # if there is a value that has to be updated (e.g. single value from a list) - if value_updates: - inputs_dict.update(value_updates) - # getting all fields that should be formatted, i.e. {field_name}, ... - inp_fields = parse_format_string(argstr) - val_dict = {} - for fld_name in inp_fields: - fld_value = inputs_dict[fld_name] - fld_attr = getattr(attrs.fields(type(inputs)), fld_name) - if fld_value is attr.NOTHING or ( - fld_value is False - and fld_attr.type is not bool - and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) - ): - # if value is NOTHING, nothing should be added to the command - val_dict[fld_name] = "" - else: - val_dict[fld_name] = fld_value - - # formatting string based on the val_dict - argstr_formatted = argstr.format(**val_dict) - # removing extra commas and spaces after removing the field that have NOTHING - argstr_formatted = ( - argstr_formatted.replace("[ ", "[") - .replace(" ]", "]") - .replace("[,", "[") - .replace(",]", "]") - .strip() - ) - return argstr_formatted diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index 28da1f176a..c951091887 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -29,8 +29,8 @@ def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir): """simple task to run fsl.bet using BoshTask""" btask = BoshTask(name="NA", zenodo_id="1482743") - btask.inputs.infile = data_tests_dir / "test.nii.gz" - btask.inputs.maskfile = maskfile + btask.spec.infile = data_tests_dir / "test.nii.gz" + btask.spec.maskfile = maskfile btask.cache_dir = tmpdir res = results_function(btask, plugin) @@ -60,8 +60,8 @@ def test_boutiques_spec_1(data_tests_dir): assert len(btask.input_spec.fields) == 2 assert btask.input_spec.fields[0][0] == "infile" assert btask.input_spec.fields[1][0] == "maskfile" - assert hasattr(btask.inputs, "infile") - assert hasattr(btask.inputs, "maskfile") + assert hasattr(btask.spec, "infile") + assert hasattr(btask.spec, "maskfile") assert len(btask.output_spec.fields) == 2 assert btask.output_spec.fields[0][0] == "outfile" @@ -84,9 +84,9 @@ def test_boutiques_spec_2(data_tests_dir): assert len(btask.input_spec.fields) == 1 assert btask.input_spec.fields[0][0] == "infile" - assert hasattr(btask.inputs, "infile") + assert hasattr(btask.spec, "infile") # input doesn't see maskfile - assert not hasattr(btask.inputs, "maskfile") + assert not hasattr(btask.spec, "maskfile") assert len(btask.output_spec.fields) == 0 diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 5f69584d60..cc196cd87c 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -77,7 +77,7 @@ def test_docker_2a(results_function, plugin): args=cmd_args, environment=Docker(image="busybox"), ) - assert docky.inputs.executable == "echo" + assert docky.spec.executable == "echo" assert docky.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" res = results_function(docky, plugin) @@ -332,7 +332,7 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): input_spec=my_input_spec, strip=True, ) - assert docky.inputs.file2.fspath == filename_2 + assert docky.spec.file2.fspath == filename_2 res = docky() assert res.output.stdout == "hello from pydra\nhave a nice one" diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 915d183973..7db3f8d34f 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -394,11 +394,11 @@ class MyCommand(ShellTask): task = MyCommand(in_file=filename) assert task.cmdline == f"my {filename}" - task.inputs.optional = True + task.spec.optional = True assert task.cmdline == f"my {filename} --opt {task.output_dir / 'file.out'}" - task.inputs.optional = False + task.spec.optional = False assert task.cmdline == f"my {filename}" - task.inputs.optional = "custom-file-out.txt" + task.spec.optional = "custom-file-out.txt" assert task.cmdline == f"my {filename} --opt custom-file-out.txt" diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index 4dc6f80369..2f5abbfb76 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -92,7 +92,7 @@ def test_interface_executable_1(): """testing if the class executable is properly set and used in the command line""" task = Interf_2() assert task.executable == "testing command" - assert task.inputs.executable == "testing command" + assert task.spec.executable == "testing command" assert task.cmdline == "testing command" @@ -103,14 +103,14 @@ def test_interface_executable_2(): task = Interf_2(executable="i want a different command") assert task.executable == "testing command" # task.executable stays the same, but input.executable is changed, so the cmd is changed - assert task.inputs.executable == "i want a different command" + assert task.spec.executable == "i want a different command" assert task.cmdline == "i want a different command" def test_interface_cmdline_with_spaces(): task = Interf_3(in_file="/path/to/file/with spaces") assert task.executable == "testing command" - assert task.inputs.executable == "testing command" + assert task.spec.executable == "testing command" assert task.cmdline == "testing command '/path/to/file/with spaces'" diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 631f72ff73..b8591092f4 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -78,7 +78,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): # separate command into exec + args shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path - assert shelly.inputs.executable == "echo" + assert shelly.spec.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) res = results_function(shelly, plugin) @@ -95,7 +95,7 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): # separate command into exec + args shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path - assert shelly.inputs.executable == "echo" + assert shelly.spec.executable == "echo" assert shelly.cmdline == "echo pydra" res = results_function(shelly, plugin) @@ -307,8 +307,8 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec - assert shelly.inputs.args == cmd_args + assert shelly.spec.executable == cmd_exec + assert shelly.spec.args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" res = results_function(shelly, plugin) @@ -356,8 +356,8 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec - assert shelly.inputs.args == cmd_args + assert shelly.spec.executable == cmd_exec + assert shelly.spec.args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO from pydra" @@ -395,7 +395,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo HELLO" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO\n" @@ -428,7 +428,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo HELLO" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO\n" @@ -462,9 +462,9 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - shelly.inputs.text = hello + shelly.spec.text = hello - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo HELLO" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO\n" @@ -530,7 +530,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo" res = results_function(shelly, plugin) assert res.output.stdout == "\n" @@ -560,7 +560,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo Hello" res = results_function(shelly, plugin) @@ -586,7 +586,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo Hello" res = results_function(shelly, plugin) @@ -617,7 +617,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "echo Hi" res = results_function(shelly, plugin) @@ -728,7 +728,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "ls -t" results_function(shelly, plugin) @@ -825,7 +825,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "ls -l -t" results_function(shelly, plugin) @@ -913,8 +913,8 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - shelly.inputs.opt_l = cmd_l - assert shelly.inputs.executable == cmd_exec + shelly.spec.opt_l = cmd_l + assert shelly.spec.executable == cmd_exec assert shelly.cmdline == "ls -l -t" results_function(shelly, plugin) @@ -1505,7 +1505,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): cache_dir=tmp_path, ) - assert shelly.inputs.executable == cmd_exec + assert shelly.spec.executable == cmd_exec res = results_function(shelly, plugin) assert res.output.stdout == "hello from boston" @@ -1590,7 +1590,7 @@ def test_shell_cmd_inputspec_11(tmp_path): wf = Workflow(name="wf", input_spec=["inputFiles"], inputFiles=["test1", "test2"]) - task.inputs.inputFiles = wf.lzin.inputFiles + task.spec.inputFiles = wf.lzin.inputFiles wf.add(task) wf.set_output([("out", wf.echoMultiple.lzout.outputFiles)]) @@ -1704,8 +1704,8 @@ def test_shell_cmd_inputspec_with_iterable(): task = ShellTask(name="test", input_spec=input_spec, executable="test") for iterable_type in (list, tuple): - task.inputs.iterable_1 = iterable_type(range(3)) - task.inputs.iterable_2 = iterable_type(["bar", "foo"]) + task.spec.iterable_1 = iterable_type(range(3)) + task.spec.iterable_2 = iterable_type(["bar", "foo"]) assert task.cmdline == "test --in1 0 1 2 --in2 bar --in2 foo" @@ -3445,8 +3445,8 @@ def test_shell_cmd_inputspec_outputspec_1(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.file2 = "new_file_2.txt" res = shelly() assert res.output.stdout == "" @@ -3499,7 +3499,7 @@ def test_shell_cmd_inputspec_outputspec_1a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" + shelly.spec.file1 = "new_file_1.txt" res = shelly() assert res.output.stdout == "" @@ -3560,8 +3560,8 @@ def test_shell_cmd_inputspec_outputspec_2(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.file2 = "new_file_2.txt" # all fields from output_spec should be in output_names and generated_output_names assert ( shelly.output_names @@ -3627,7 +3627,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" + shelly.spec.file1 = "new_file_1.txt" # generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", @@ -3699,9 +3699,9 @@ def test_shell_cmd_inputspec_outputspec_3(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" - shelly.inputs.additional_inp = 2 + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.file2 = "new_file_2.txt" + shelly.spec.additional_inp = 2 res = shelly() assert res.output.stdout == "" @@ -3760,8 +3760,8 @@ def test_shell_cmd_inputspec_outputspec_3a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.file2 = "new_file_2.txt" + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.file2 = "new_file_2.txt" # generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", @@ -3824,8 +3824,8 @@ def test_shell_cmd_inputspec_outputspec_4(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.additional_inp = 2 + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.additional_inp = 2 # generated_output_names should be the same as output_names assert ( shelly.output_names @@ -3879,9 +3879,9 @@ def test_shell_cmd_inputspec_outputspec_4a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" + shelly.spec.file1 = "new_file_1.txt" # the value is not in the list from requires - shelly.inputs.additional_inp = 1 + shelly.spec.additional_inp = 1 res = shelly() assert res.output.stdout == "" @@ -3934,8 +3934,8 @@ def test_shell_cmd_inputspec_outputspec_5(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.additional_inp_A = 2 + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.additional_inp_A = 2 res = shelly() assert res.output.stdout == "" @@ -3988,8 +3988,8 @@ def test_shell_cmd_inputspec_outputspec_5a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" - shelly.inputs.additional_inp_B = 2 + shelly.spec.file1 = "new_file_1.txt" + shelly.spec.additional_inp_B = 2 res = shelly() assert res.output.stdout == "" @@ -4042,7 +4042,7 @@ def test_shell_cmd_inputspec_outputspec_5b(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" + shelly.spec.file1 = "new_file_1.txt" res = shelly() assert res.output.stdout == "" @@ -4091,7 +4091,7 @@ def test_shell_cmd_inputspec_outputspec_6_except(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.inputs.file1 = "new_file_1.txt" + shelly.spec.file1 = "new_file_1.txt" with pytest.raises(Exception, match="requires field can be"): shelly() @@ -4338,7 +4338,7 @@ def change_name(file): name="bet_task", executable="bet", in_file=in_file, input_spec=bet_input_spec ) out_file = shelly.output_dir / "test_brain.nii.gz" - assert shelly.inputs.executable == "bet" + assert shelly.spec.executable == "bet" assert shelly.cmdline == f"bet {in_file} {out_file}" # res = shelly(plugin="cf") diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 53071d65c5..7b95ea558f 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -719,7 +719,7 @@ def test_shell_cmd_inputs_not_given_1(): ) shelly = ShellTask(name="shelly", executable="executable", input_spec=my_input_spec) - shelly.inputs.arg2 = "argument2" + shelly.spec.arg2 = "argument2" assert shelly.cmdline == "executable --arg2 argument2" @@ -1747,7 +1747,7 @@ def test_shell_cmd_inputs_template_requires_1(): assert "--tpl" not in shelly.cmdline # When requirements are met. - shelly.inputs.with_tpl = True + shelly.spec.with_tpl = True assert "tpl.in.file" in shelly.cmdline @@ -2212,27 +2212,27 @@ class SimpleTaskXor(ShellTask): def test_task_inputs_mandatory_with_xOR_one_mandatory_is_OK(): """input spec with mandatory inputs""" task = SimpleTaskXor() - task.inputs.input_1 = "Input1" - task.inputs.input_2 = attr.NOTHING - task.inputs.check_fields_input_spec() + task.spec.input_1 = "Input1" + task.spec.input_2 = attr.NOTHING + task.spec.check_fields_input_spec() def test_task_inputs_mandatory_with_xOR_one_mandatory_out_3_is_OK(): """input spec with mandatory inputs""" task = SimpleTaskXor() - task.inputs.input_1 = attr.NOTHING - task.inputs.input_2 = attr.NOTHING - task.inputs.input_3 = True - task.inputs.check_fields_input_spec() + task.spec.input_1 = attr.NOTHING + task.spec.input_2 = attr.NOTHING + task.spec.input_3 = True + task.spec.check_fields_input_spec() def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): """input spec with mandatory inputs""" task = SimpleTaskXor() - task.inputs.input_1 = attr.NOTHING - task.inputs.input_2 = attr.NOTHING + task.spec.input_1 = attr.NOTHING + task.spec.input_2 = attr.NOTHING with pytest.raises(Exception) as excinfo: - task.inputs.check_fields_input_spec() + task.spec.check_fields_input_spec() assert "input_1 is mandatory" in str(excinfo.value) assert "no alternative provided by ['input_2', 'input_3']" in str(excinfo.value) assert excinfo.type is AttributeError @@ -2241,11 +2241,11 @@ def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): """input spec with mandatory inputs""" task = SimpleTaskXor() - task.inputs.input_1 = "Input1" - task.inputs.input_2 = True + task.spec.input_1 = "Input1" + task.spec.input_2 = True with pytest.raises(Exception) as excinfo: - task.inputs.check_fields_input_spec() + task.spec.check_fields_input_spec() assert "input_1 is mutually exclusive with ['input_2']" in str(excinfo.value) assert excinfo.type is AttributeError @@ -2253,12 +2253,12 @@ def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): """input spec with mandatory inputs""" task = SimpleTaskXor() - task.inputs.input_1 = "Input1" - task.inputs.input_2 = True - task.inputs.input_3 = False + task.spec.input_1 = "Input1" + task.spec.input_2 = True + task.spec.input_3 = False with pytest.raises(Exception) as excinfo: - task.inputs.check_fields_input_spec() + task.spec.check_fields_input_spec() assert "input_1 is mutually exclusive with ['input_2', 'input_3']" in str( excinfo.value ) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 8699eb1711..4a481e9d92 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -57,7 +57,7 @@ def test_numpy(): fft = mark.annotate({"a": np.ndarray, "return": np.ndarray})(np.fft.fft) fft = mark.task(fft)() arr = np.array([[1, 10], [2, 20]]) - fft.inputs.a = arr + fft.spec.a = arr res = fft() assert np.allclose(np.fft.fft(arr), res.output.out) @@ -1319,7 +1319,7 @@ def test_shell_cmd(tmpdir): # separate command into exec + args shelly = ShellTask(executable=cmd[0], args=cmd[1:]) - assert shelly.inputs.executable == "echo" + assert shelly.spec.executable == "echo" assert shelly.cmdline == " ".join(cmd) res = shelly._run() assert res.output.return_code == 0 diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 36efa95af1..189fc0cebc 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -1,11 +1,14 @@ import typing as ty -from copy import deepcopy +from copy import deepcopy, copy from enum import Enum +from pathlib import Path import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy -from ..specs import TaskSpec, Outputs -from ..helpers import ensure_list, attrs_values, is_lazy +from ..specs import TaskSpec, Outputs, WorkflowSpec +from ..task import Task +from ..helpers import ensure_list, attrs_values, is_lazy, load_result, create_checksum +from pydra.utils.hash import hash_function from .. import helpers_state as hlpst from ..state import State @@ -273,6 +276,94 @@ def combiner(self): return () return self._state.combiner + def _get_tasks( + self, + cache_locations: Path | list[Path], + state_index: int | None = None, + return_inputs: bool = False, + ) -> list["Task"]: + raise NotImplementedError + if self.state: + if state_index is None: + # if state_index=None, collecting all results + if self.state.combiner: + return self._combined_output(return_inputs=return_inputs) + else: + results = [] + for ind in range(len(self.state.inputs_ind)): + checksum = self.checksum_states(state_index=ind) + result = load_result(checksum, cache_locations) + if result is None: + return None + results.append(result) + if return_inputs is True or return_inputs == "val": + return list(zip(self.state.states_val, results)) + elif return_inputs == "ind": + return list(zip(self.state.states_ind, results)) + else: + return results + else: # state_index is not None + if self.state.combiner: + return self._combined_output(return_inputs=return_inputs)[ + state_index + ] + result = load_result(self.checksum_states(state_index), cache_locations) + if return_inputs is True or return_inputs == "val": + return (self.state.states_val[state_index], result) + elif return_inputs == "ind": + return (self.state.states_ind[state_index], result) + else: + return result + else: + return load_result(self._spec._checksum, cache_locations) + + def _checksum_states(self, state_index=None): + """ + Calculate a checksum for the specific state or all of the states of the task. + Replaces state-arrays in the inputs fields with a specific values for states. + Used to recreate names of the task directories, + + Parameters + ---------- + state_index : + TODO + + """ + # if is_workflow(self) and self.spec._graph_checksums is attr.NOTHING: + # self.spec._graph_checksums = { + # nd.name: nd.checksum for nd in self.graph_sorted + # } + + if state_index is not None: + inputs_copy = copy(self.spec) + for key, ind in self.state.inputs_ind[state_index].items(): + val = self._extract_input_el( + inputs=self.spec, inp_nm=key.split(".")[1], ind=ind + ) + setattr(inputs_copy, key.split(".")[1], val) + # setting files_hash again in case it was cleaned by setting specific element + # that might be important for outer splitter of input variable with big files + # the file can be changed with every single index even if there are only two files + input_hash = inputs_copy.hash + if isinstance(self._spec, WorkflowSpec): + con_hash = hash_function(self._connections) + # TODO: hash list is not used + hash_list = [input_hash, con_hash] # noqa: F841 + checksum_ind = create_checksum( + self.__class__.__name__, self._checksum_wf(input_hash) + ) + else: + checksum_ind = create_checksum(self.__class__.__name__, input_hash) + return checksum_ind + else: + checksum_list = [] + if not hasattr(self.state, "inputs_ind"): + self.state.prepare_states(self.spec, cont_dim=self.cont_dim) + self.state.prepare_inputs() + for ind in range(len(self.state.inputs_ind)): + checksum_list.append(self._checksum_states(state_index=ind)) + return checksum_list + def _check_if_outputs_have_been_used(self, msg): used = [] if self._lzout: diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index ee21d26db3..2ce2efd1ff 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1042,3 +1042,14 @@ def is_fileset_or_union(type_: type) -> bool: if is_union(type_): return any(is_fileset_or_union(t) for t in ty.get_args(type_)) return issubclass(type_, core.FileSet) + + +def is_type(*args: ty.Any) -> bool: + """check that the value is a type or generic""" + if len(args) == 3: # attrs validator + val = args[2] + elif len(args) != 1: + raise TypeError(f"is_type() takes 1 or 3 arguments, not {args}") + else: + val = args[0] + return inspect.isclass(val) or ty.get_origin(val) From 7df9a2bbbbe136bf7d4f1b52ace48752e8be9050 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 13 Dec 2024 16:47:36 +1100 Subject: [PATCH 066/342] implemented python task execution --- pydra/design/tests/test_python.py | 164 +++++++++++++++--------------- pydra/engine/core.py | 2 +- pydra/engine/specs.py | 31 +++--- pydra/engine/task.py | 5 +- 4 files changed, 98 insertions(+), 104 deletions(-) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 58f99e1317..302c88d14f 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -17,20 +17,21 @@ def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = python.define(func) + SampleSpec = python.define(func) - assert issubclass(SampleInterface, PythonSpec) - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert issubclass(SampleSpec, PythonSpec) + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="out", type=float)] - SampleInterface(a=1) - SampleInterface.Outputs(out=2.0) + spec = SampleSpec(a=1) + result = spec() + assert result.output.out == 2.0 with pytest.raises(TypeError): - SampleInterface(a=1.5) + SampleSpec(a=1.5) def test_interface_wrap_function_with_default(): @@ -38,20 +39,19 @@ def func(a: int, k: float = 2.0) -> float: """Sample function with inputs and outputs""" return a * k - SampleInterface = python.define(func) + SampleSpec = python.define(func) - assert issubclass(SampleInterface, PythonSpec) - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert issubclass(SampleSpec, PythonSpec) + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="function", type=ty.Callable, default=func), python.arg(name="k", type=float, default=2.0), ] assert outputs == [python.out(name="out", type=float)] - SampleInterface(a=1) - SampleInterface(a=10, k=3.0) - SampleInterface.Outputs(out=2.0) + assert SampleSpec(a=1)().output.out == 2.0 + assert SampleSpec(a=10, k=3.0)().output.out == 30.0 def test_interface_wrap_function_overrides(): @@ -59,15 +59,15 @@ def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = python.define( + SampleSpec = python.define( func, inputs={"a": python.arg(help_string="The argument to be doubled")}, outputs={"b": python.out(help_string="the doubled output", type=Decimal)}, ) - assert issubclass(SampleInterface, PythonSpec) - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert issubclass(SampleSpec, PythonSpec) + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="The argument to be doubled"), python.arg(name="function", type=ty.Callable, default=func), @@ -75,7 +75,7 @@ def func(a: int) -> float: assert outputs == [ python.out(name="b", type=Decimal, help_string="the doubled output"), ] - outputs = SampleInterface.Outputs(b=Decimal(2.0)) + outputs = SampleSpec.Outputs(b=Decimal(2.0)) assert isinstance(outputs.b, Decimal) @@ -84,84 +84,84 @@ def func(a: int) -> int: """Sample function with inputs and outputs""" return a * 2 - SampleInterface = python.define( + SampleSpec = python.define( func, inputs={"a": float}, outputs={"b": float}, ) - assert issubclass(SampleInterface, PythonSpec) - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert issubclass(SampleSpec, PythonSpec) + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=float), python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="b", type=float)] - intf = SampleInterface(a=1) + intf = SampleSpec(a=1) assert isinstance(intf.a, float) - outputs = SampleInterface.Outputs(b=2.0) + outputs = SampleSpec.Outputs(b=2.0) assert isinstance(outputs.b, float) def test_decorated_function_interface(): @python.define(outputs=["c", "d"]) - def SampleInterface(a: int, b: float) -> tuple[float, float]: + def SampleSpec(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" return a + b, a * b - assert issubclass(SampleInterface, PythonSpec) - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert issubclass(SampleSpec, PythonSpec) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="b", type=float), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float), python.out(name="d", type=float), ] - assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" - SampleInterface.Outputs(c=1.0, d=2.0) + assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" + SampleSpec.Outputs(c=1.0, d=2.0) def test_interface_with_function_implicit_outputs_from_return_stmt(): @python.define - def SampleInterface(a: int, b: float) -> tuple[float, float]: + def SampleSpec(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" c = a + b d = a * b return c, d - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="b", type=float), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float), python.out(name="d", type=float), ] - assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" - SampleInterface.Outputs(c=1.0, d=2.0) + assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" + SampleSpec.Outputs(c=1.0, d=2.0) def test_interface_with_function_docstr(): @python.define(outputs=["c", "d"]) - def SampleInterface(a: int, b: float) -> tuple[float, float]: + def SampleSpec(a: int, b: float) -> tuple[float, float]: """Sample function for testing :param a: First input to be inputted @@ -171,28 +171,28 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="product of a and b"), ] - assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" + assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" def test_interface_with_function_google_docstr(): @python.define(outputs=["c", "d"]) - def SampleInterface(a: int, b: float) -> tuple[float, float]: + def SampleSpec(a: int, b: float) -> tuple[float, float]: """Sample function for testing Args: @@ -206,30 +206,30 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" + assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" def test_interface_with_function_numpy_docstr(): @python.define( outputs=["c", "d"] ) # Could potentiall read output names from doc-string instead - def SampleInterface(a: int, b: float) -> tuple[float, float]: + def SampleSpec(a: int, b: float) -> tuple[float, float]: """Sample function for testing Parameters @@ -249,28 +249,28 @@ def SampleInterface(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert attrs.fields(SampleInterface).function.default.__name__ == "SampleInterface" + assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" def test_interface_with_class(): @python.define - class SampleInterface: + class SampleSpec: """Sample class for testing Args: @@ -296,32 +296,32 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, PythonSpec) - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert issubclass(SampleSpec, PythonSpec) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, default=2.0, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert SampleInterface.function.__name__ == "function" - SampleInterface(a=1) - SampleInterface(a=1, b=2.0) - SampleInterface.Outputs(c=1.0, d=2.0) + assert SampleSpec.function.__name__ == "function" + SampleSpec(a=1) + SampleSpec(a=1, b=2.0) + SampleSpec.Outputs(c=1.0, d=2.0) def test_interface_with_inheritance(): @python.define - class SampleInterface(PythonSpec["SampleInterface.Outputs"]): + class SampleSpec(PythonSpec["SampleSpec.Outputs"]): """Sample class for testing Args: @@ -347,12 +347,12 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleInterface, PythonSpec) + assert issubclass(SampleSpec, PythonSpec) def test_interface_with_class_no_auto_attribs(): @python.define(auto_attribs=False) - class SampleInterface: + class SampleSpec: a: int = python.arg(help_string="First input to be inputted") b: float = python.arg(help_string="Second input") @@ -368,36 +368,36 @@ class Outputs: def function(a, b): return a + b, a * b - assert SampleInterface.Task is PythonTask - inputs = sorted(list_fields(SampleInterface), key=sort_key) - outputs = sorted(list_fields(SampleInterface.Outputs), key=sort_key) + assert SampleSpec.Task is PythonTask + inputs = sorted(list_fields(SampleSpec), key=sort_key) + outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleInterface).function.default, + default=attrs.fields(SampleSpec).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert SampleInterface.function.__name__ == "function" - SampleInterface(a=1, b=2.0) - SampleInterface.Outputs(c=1.0, d=2.0) + assert SampleSpec.function.__name__ == "function" + SampleSpec(a=1, b=2.0) + SampleSpec.Outputs(c=1.0, d=2.0) with pytest.raises(TypeError): - SampleInterface(a=1, b=2.0, x=3) + SampleSpec(a=1, b=2.0, x=3) with pytest.raises(TypeError): - SampleInterface.Outputs(c=1.0, d=2.0, y="hello") + SampleSpec.Outputs(c=1.0, d=2.0, y="hello") def test_interface_invalid_wrapped1(): with pytest.raises(ValueError): @python.define(inputs={"a": python.arg()}) - class SampleInterface(PythonSpec["SampleInterface.Outputs"]): + class SampleSpec(PythonSpec["SampleSpec.Outputs"]): a: int class Outputs: @@ -412,7 +412,7 @@ def test_interface_invalid_wrapped2(): with pytest.raises(ValueError): @python.define(outputs={"b": python.out()}) - class SampleInterface(PythonSpec["SampleInterface.Outputs"]): + class SampleSpec(PythonSpec["SampleSpec.Outputs"]): a: int class Outputs: diff --git a/pydra/engine/core.py b/pydra/engine/core.py index f74264dfb8..e421e2fff8 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -157,7 +157,7 @@ def __init__( raise ValueError(f"Unknown input set {inputs!r}") inputs = self._input_sets[inputs] - self.spec = attr.evolve(self.spec, **inputs) + self.spec = attr.evolve(self.spec, **inputs) # checking if metadata is set properly self.spec._check_resolved() diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index b026c342cc..165c1cd94d 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -13,6 +13,7 @@ from copy import deepcopy from typing_extensions import Self import attrs +import cloudpickle as cp from fileformats.generic import File from pydra.engine.audit import AuditFlag from pydra.utils.typing import TypeParser, MultiOutputObj @@ -60,7 +61,12 @@ def from_task(cls, task: "Task") -> Self: outputs : Outputs The outputs of the task """ - return cls(**{f.name: attrs.NOTHING for f in attrs_fields(cls)}) + return cls( + **{ + f.name: task.output_.get(f.name, attrs.NOTHING) + for f in attrs_fields(cls) + } + ) @property def inputs(self): @@ -319,26 +325,19 @@ class Runtime: class Result: """Metadata regarding the outputs of processing.""" - output: ty.Optional[ty.Any] = None - runtime: ty.Optional[Runtime] = None + output: Outputs | None = None + runtime: Runtime | None = None errored: bool = False def __getstate__(self): state = attrs_values(self) if state["output"] is not None: - fields = tuple((el.name, el.type) for el in attrs_fields(state["output"])) - state["output_spec"] = (state["output"].__class__.__name__, fields) - state["output"] = attrs.asdict(state["output"], recurse=False) + state["output"] = cp.dumps(state["output"]) return state def __setstate__(self, state): - if "output_spec" in state: - spec = list(state["output_spec"]) - del state["output_spec"] - klass = attrs.make_class( - spec[0], {k: attrs.field(type=v) for k, v in list(spec[1])} - ) - state["output"] = klass(**state["output"]) + if state["output"] is not None: + state["output"] = cp.loads(state["output"]) self.__dict__.update(state) def get_output_field(self, field_name): @@ -441,11 +440,7 @@ def from_task( The outputs of the shell process """ - outputs = cls( - return_code=task.output_["return_code"], - stdout=task.output_["stdout"], - stderr=task.output_["stderr"], - ) + outputs = super().from_task(task) fld: shell.out for fld in list_fields(cls): if fld.name in ["return_code", "stdout", "stderr"]: diff --git a/pydra/engine/task.py b/pydra/engine/task.py index ab226e8ba4..36e41e0e70 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -43,7 +43,6 @@ import attr from pathlib import Path -import cloudpickle as cp from fileformats.core import FileSet from .core import Task from pydra.utils.messenger import AuditFlag @@ -67,9 +66,9 @@ class PythonTask(Task): def _run_task(self, environment=None): inputs = attrs_values(self.spec) - del inputs["_func"] + del inputs["function"] self.output_ = None - output = cp.loads(self.spec._func)(**inputs) + output = self.spec.function(**inputs) output_names = [f.name for f in attr.fields(self.spec.Outputs)] if output is None: self.output_ = {nm: None for nm in output_names} From 860002fffc3b16b413b5de60bf3e4243d5c9e40a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 14:28:09 +1100 Subject: [PATCH 067/342] moved boutiques into design pacakge --- pydra/design/boutiques.py | 223 +++++++++++++++++++++++++++ pydra/engine/boutiques.py | 213 ------------------------- pydra/engine/task.py | 43 +++++- pydra/engine/tests/test_boutiques.py | 110 ++++++------- 4 files changed, 308 insertions(+), 281 deletions(-) create mode 100644 pydra/design/boutiques.py delete mode 100644 pydra/engine/boutiques.py diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py new file mode 100644 index 0000000000..6877bf4822 --- /dev/null +++ b/pydra/design/boutiques.py @@ -0,0 +1,223 @@ +import typing as ty +import json +import tempfile +from urllib.request import urlretrieve +from pathlib import Path +from functools import reduce +from fileformats.generic import File +from pydra.engine.specs import ShellSpec +from pydra.engine.task import BoshTask +from .base import make_task_spec +from . import shell + + +class arg(shell.arg): + """Class for input fields of Boutiques task specifications + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is EMPTY + help_string: str + A short description of the input field. + allowed_values: list, optional + List of allowed values for the field. + requires: list, optional + Names of the inputs that are required together with the field. + xor: list[str], optional + Names of the inputs that are mutually exclusive with the field. + copy_mode: File.CopyMode, optional + The mode of copying the file, by default it is File.CopyMode.any + copy_collation: File.CopyCollation, optional + The collation of the file, by default it is File.CopyCollation.any + copy_ext_decomp: File.ExtensionDecomposition, optional + The extension decomposition of the file, by default it is + File.ExtensionDecomposition.single + readonly: bool, optional + If True the input field can’t be provided by the user but it aggregates other + input fields (for example the fields with argstr: -o {fldA} {fldB}), by default + it is False + """ + + +class out(shell.out): + """Class for output fields of Boutiques task specifications + + Parameters + ---------- + name: str, optional + The name of the field, used when specifying a list of fields instead of a mapping + from name to field, by default it is None + type: type, optional + The type of the field, by default it is Any + default : Any, optional + the default value for the field, by default it is EMPTY + help_string: str, optional + A short description of the input field. + requires: list, optional + Names of the inputs that are required together with the field. + converter: callable, optional + The converter for the field passed through to the attrs.field, by default it is None + validator: callable | iterable[callable], optional + The validator(s) for the field passed through to the attrs.field, by default it is None + """ + + +def define( + zenodo_id=None, + bosh_file=None, + input_spec_names: list[str] | None = None, + output_spec_names: list[str] | None = None, +): + """ + Initialize this task. + + Parameters + ---------- + zenodo_id: :obj: str + Zenodo ID + bosh_file : : str + json file with the boutiques descriptors + audit_flags : :obj:`pydra.utils.messenger.AuditFlag` + Auditing configuration + cache_dir : :obj:`os.pathlike` + Cache directory + input_spec_names : :obj: list + Input names for input_spec. + messenger_args : + TODO + messengers : + TODO + name : :obj:`str` + Name of this task. + output_spec_names : :obj: list + Output names for output_spec. + strip : :obj:`bool` + TODO + + """ + if (bosh_file and zenodo_id) or not (bosh_file or zenodo_id): + raise Exception("either bosh or zenodo_id has to be specified") + elif zenodo_id: + bosh_file = _download_spec(zenodo_id) + + with bosh_file.open() as f: + bosh_spec = json.load(f) + + inputs, input_keys = _prepare_input_spec(bosh_spec, names_subset=input_spec_names) + outputs = _prepare_output_spec( + bosh_spec, input_keys, names_subset=output_spec_names + ) + return make_task_spec( + spec_type=ShellSpec, + task_type=BoshTask, + out_type=out, + arg_type=arg, + inputs=inputs, + outputs=outputs, + ) + + +def _download_spec(zenodo_id): + """ + using boutiques Searcher to find url of zenodo file for a specific id, + and download the file to self.cache_dir + """ + from boutiques.searcher import Searcher + + tmp_dir = Path(tempfile.mkdtemp()) + + searcher = Searcher(zenodo_id, exact_match=True) + hits = searcher.zenodo_search().json()["hits"]["hits"] + if len(hits) == 0: + raise Exception(f"can't find zenodo spec for {zenodo_id}") + elif len(hits) > 1: + raise Exception(f"too many hits for {zenodo_id}") + else: + zenodo_url = hits[0]["files"][0]["links"]["self"] + zenodo_file = tmp_dir / f"zenodo.{zenodo_id}.json" + urlretrieve(zenodo_url, zenodo_file) + return zenodo_file + + +def _prepare_input_spec(bosh_spec: dict[str, ty.Any], names_subset=None): + """creating input spec from the zenodo file + if name_subset provided, only names from the subset will be used in the spec + """ + binputs = bosh_spec["inputs"] + input_keys = {} + fields = [] + for input in binputs: + name = input["id"] + if names_subset is None: + pass + elif name not in names_subset: + continue + else: + names_subset.remove(name) + if input["type"] == "File": + tp = File + elif input["type"] == "String": + tp = str + elif input["type"] == "Number": + tp = float + elif input["type"] == "Flag": + tp = bool + else: + tp = None + # adding list + if tp and "list" in input and input["list"]: + tp = ty.List[tp] + + fields.append( + arg( + name=name, + type=tp, + help_string=input.get("description", None) or input["name"], + mandatory=not input["optional"], + argstr=input.get("command-line-flag", None), + ) + ) + input_keys[input["value-key"]] = "{" + f"{name}" + "}" + if names_subset: + raise RuntimeError(f"{names_subset} are not in the zenodo input spec") + return fields, input_keys + + +def _prepare_output_spec(bosh_spec: dict[str, ty.Any], input_keys, names_subset=None): + """creating output spec from the zenodo file + if name_subset provided, only names from the subset will be used in the spec + """ + boutputs = bosh_spec["output-files"] + fields = [] + for output in boutputs: + name = output["id"] + if names_subset is None: + pass + elif name not in names_subset: + continue + else: + names_subset.remove(name) + path_template = reduce( + lambda s, r: s.replace(*r), + input_keys.items(), + output["path-template"], + ) + fields.append( + out( + name=name, + type=File, + help_string=output.get("description", None) or output["name"], + mandatory=not output["optional"], + output_file_template=path_template, + ) + ) + + if names_subset: + raise RuntimeError(f"{names_subset} are not in the zenodo output spec") + return fields diff --git a/pydra/engine/boutiques.py b/pydra/engine/boutiques.py deleted file mode 100644 index d12d30f1d4..0000000000 --- a/pydra/engine/boutiques.py +++ /dev/null @@ -1,213 +0,0 @@ -import typing as ty -import json -import attr -from urllib.request import urlretrieve -from pathlib import Path -from functools import reduce - -from pydra.utils.messenger import AuditFlag -from pydra.engine.task import ShellTask -from pydra.engine.specs import SpecInfo, ShellSpec, ShellOutputs, File, attrs_fields -from .helpers_file import is_local_file - - -class BoshTask(ShellTask): - """Shell Command Task based on the Boutiques descriptor""" - - def __init__( - self, - zenodo_id=None, - bosh_file=None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - input_spec_names: ty.Optional[ty.List] = None, - messenger_args=None, - messengers=None, - name=None, - output_spec_names: ty.Optional[ty.List] = None, - rerun=False, - strip=False, - **kwargs, - ): - """ - Initialize this task. - - Parameters - ---------- - zenodo_id: :obj: str - Zenodo ID - bosh_file : : str - json file with the boutiques descriptors - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - input_spec_names : :obj: list - Input names for input_spec. - messenger_args : - TODO - messengers : - TODO - name : :obj:`str` - Name of this task. - output_spec_names : :obj: list - Output names for output_spec. - strip : :obj:`bool` - TODO - - """ - self.cache_dir = cache_dir - if (bosh_file and zenodo_id) or not (bosh_file or zenodo_id): - raise Exception("either bosh or zenodo_id has to be specified") - elif zenodo_id: - self.bosh_file = self._download_spec(zenodo_id) - else: # bosh_file - self.bosh_file = bosh_file - - with self.bosh_file.open() as f: - self.bosh_spec = json.load(f) - - self.input_spec = self._prepare_input_spec(names_subset=input_spec_names) - self.output_spec = self._prepare_output_spec(names_subset=output_spec_names) - self.bindings = ["-v", f"{self.bosh_file.parent}:{self.bosh_file.parent}:ro"] - - super().__init__( - name=name, - input_spec=self.input_spec, - output_spec=self.output_spec, - executable=["bosh", "exec", "launch"], - args=["-s"], - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=self.cache_dir, - strip=strip, - rerun=rerun, - **kwargs, - ) - self.strip = strip - - def _download_spec(self, zenodo_id): - """ - using boutiques Searcher to find url of zenodo file for a specific id, - and download the file to self.cache_dir - """ - from boutiques.searcher import Searcher - - searcher = Searcher(zenodo_id, exact_match=True) - hits = searcher.zenodo_search().json()["hits"]["hits"] - if len(hits) == 0: - raise Exception(f"can't find zenodo spec for {zenodo_id}") - elif len(hits) > 1: - raise Exception(f"too many hits for {zenodo_id}") - else: - zenodo_url = hits[0]["files"][0]["links"]["self"] - zenodo_file = self.cache_dir / f"zenodo.{zenodo_id}.json" - urlretrieve(zenodo_url, zenodo_file) - return zenodo_file - - def _prepare_input_spec(self, names_subset=None): - """creating input spec from the zenodo file - if name_subset provided, only names from the subset will be used in the spec - """ - binputs = self.bosh_spec["inputs"] - self._input_spec_keys = {} - fields = [] - for input in binputs: - name = input["id"] - if names_subset is None: - pass - elif name not in names_subset: - continue - else: - names_subset.remove(name) - if input["type"] == "File": - tp = File - elif input["type"] == "String": - tp = str - elif input["type"] == "Number": - tp = float - elif input["type"] == "Flag": - tp = bool - else: - tp = None - # adding list - if tp and "list" in input and input["list"]: - tp = ty.List[tp] - - mdata = { - "help_string": input.get("description", None) or input["name"], - "mandatory": not input["optional"], - "argstr": input.get("command-line-flag", None), - } - fields.append((name, tp, mdata)) - self._input_spec_keys[input["value-key"]] = "{" + f"{name}" + "}" - if names_subset: - raise RuntimeError(f"{names_subset} are not in the zenodo input spec") - spec = SpecInfo(name="Inputs", fields=fields, bases=(ShellSpec,)) - return spec - - def _prepare_output_spec(self, names_subset=None): - """creating output spec from the zenodo file - if name_subset provided, only names from the subset will be used in the spec - """ - boutputs = self.bosh_spec["output-files"] - fields = [] - for output in boutputs: - name = output["id"] - if names_subset is None: - pass - elif name not in names_subset: - continue - else: - names_subset.remove(name) - path_template = reduce( - lambda s, r: s.replace(*r), - self._input_spec_keys.items(), - output["path-template"], - ) - mdata = { - "help_string": output.get("description", None) or output["name"], - "mandatory": not output["optional"], - "output_file_template": path_template, - } - fields.append((name, attr.ib(type=File, metadata=mdata))) - - if names_subset: - raise RuntimeError(f"{names_subset} are not in the zenodo output spec") - spec = SpecInfo(name="Outputs", fields=fields, bases=(ShellOutputs,)) - return spec - - def _command_args_single(self, state_ind=None, index=None): - """Get command line arguments for a single state""" - input_filepath = self._bosh_invocation_file(state_ind=state_ind, index=index) - cmd_list = ( - self.spec.executable - + [str(self.bosh_file), input_filepath] - + self.spec.args - + self.bindings - ) - return cmd_list - - def _bosh_invocation_file(self, state_ind=None, index=None): - """creating bosh invocation file - json file with inputs values""" - input_json = {} - for f in attrs_fields(self.spec, exclude_names=("executable", "args")): - if self.state and f"{self.name}.{f.name}" in state_ind: - value = getattr(self.spec, f.name)[state_ind[f"{self.name}.{f.name}"]] - else: - value = getattr(self.spec, f.name) - # adding to the json file if specified by the user - if value is not attr.NOTHING and value != "NOTHING": - if is_local_file(f): - value = Path(value) - self.bindings.extend(["-v", f"{value.parent}:{value.parent}:ro"]) - value = str(value) - - input_json[f.name] = value - - filename = self.cache_dir / f"{self.name}-{index}.json" - with open(filename, "w") as jsonfile: - json.dump(input_json, jsonfile) - - return str(filename) diff --git a/pydra/engine/task.py b/pydra/engine/task.py index 36e41e0e70..d2acc4ef74 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -41,7 +41,8 @@ from __future__ import annotations -import attr +import attrs +import json from pathlib import Path from fileformats.core import FileSet from .core import Task @@ -55,6 +56,7 @@ attrs_values, parse_copyfile, ) +from pydra.engine.helpers_file import is_local_file from pydra.utils.typing import TypeParser from .environments import Native @@ -69,7 +71,7 @@ def _run_task(self, environment=None): del inputs["function"] self.output_ = None output = self.spec.function(**inputs) - output_names = [f.name for f in attr.fields(self.spec.Outputs)] + output_names = [f.name for f in attrs.fields(self.spec.Outputs)] if output is None: self.output_ = {nm: None for nm in output_names} elif len(output_names) == 1: @@ -202,3 +204,40 @@ def _prepare_bindings(self, root: str): ) DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent + + +class BoshTask(ShellTask): + + def _command_args_single(self, state_ind=None, index=None): + """Get command line arguments for a single state""" + input_filepath = self._bosh_invocation_file(state_ind=state_ind, index=index) + cmd_list = ( + self.spec.executable + + [str(self.bosh_file), input_filepath] + + self.spec.args + + self.bindings + ) + return cmd_list + + def _bosh_invocation_file(self, state_ind=None, index=None): + """creating bosh invocation file - json file with inputs values""" + input_json = {} + for f in attrs_fields(self.spec, exclude_names=("executable", "args")): + if self.state and f"{self.name}.{f.name}" in state_ind: + value = getattr(self.spec, f.name)[state_ind[f"{self.name}.{f.name}"]] + else: + value = getattr(self.spec, f.name) + # adding to the json file if specified by the user + if value is not attrs.NOTHING and value != "NOTHING": + if is_local_file(f): + value = Path(value) + self.bindings.extend(["-v", f"{value.parent}:{value.parent}:ro"]) + value = str(value) + + input_json[f.name] = value + + filename = self.cache_dir / f"{self.name}-{index}.json" + with open(filename, "w") as jsonfile: + json.dump(input_json, jsonfile) + + return str(filename) diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index c951091887..d56f6d5809 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -2,12 +2,9 @@ import subprocess as sp import attr import pytest - -from ..core import Workflow -from ..task import ShellTask -from ..submitter import Submitter -from ..boutiques import BoshTask +from pydra.engine.helpers import attrs_values from .utils import result_no_submitter, result_submitter, no_win +from pydra.design import workflow, boutiques, shell need_bosh_docker = pytest.mark.skipif( shutil.which("docker") is None @@ -28,11 +25,10 @@ @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir): """simple task to run fsl.bet using BoshTask""" - btask = BoshTask(name="NA", zenodo_id="1482743") - btask.spec.infile = data_tests_dir / "test.nii.gz" - btask.spec.maskfile = maskfile - btask.cache_dir = tmpdir - res = results_function(btask, plugin) + btask = boutiques.define(zenodo_id="1482743") + btask.infile = data_tests_dir / "test.nii.gz" + btask.maskfile = maskfile + res = btask(plugin, cache_dir=tmpdir) assert res.output.return_code == 0 @@ -48,13 +44,13 @@ def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir) @pytest.mark.flaky(reruns=3) def test_boutiques_spec_1(data_tests_dir): """testing spec: providing input/output fields names""" - btask = BoshTask( - name="NA", + btask = boutiques.define( zenodo_id="1482743", - infile=data_tests_dir / "test.nii.gz", - maskfile="test_brain.nii.gz", input_spec_names=["infile", "maskfile"], output_spec_names=["outfile", "out_outskin_off"], + )( + infile=data_tests_dir / "test.nii.gz", + maskfile="test_brain.nii.gz", ) assert len(btask.input_spec.fields) == 2 @@ -73,17 +69,16 @@ def test_boutiques_spec_1(data_tests_dir): @pytest.mark.flaky(reruns=3) def test_boutiques_spec_2(data_tests_dir): """testing spec: providing partial input/output fields names""" - btask = BoshTask( - name="NA", - zenodo_id="1482743", + btask = boutiques.define( + zenodo_id="1482743", input_spec_names=["infile"], output_spec_names=[] + )( infile=data_tests_dir / "test.nii.gz", maskfile="test_brain.nii.gz", - input_spec_names=["infile"], - output_spec_names=[], ) - assert len(btask.input_spec.fields) == 1 - assert btask.input_spec.fields[0][0] == "infile" + fields = attrs_values(btask) + assert len(fields) == 1 + assert fields[0][0] == "infile" assert hasattr(btask.spec, "infile") # input doesn't see maskfile assert not hasattr(btask.spec, "maskfile") @@ -99,24 +94,19 @@ def test_boutiques_spec_2(data_tests_dir): ) def test_boutiques_wf_1(maskfile, plugin, tmpdir, infile): """wf with one task that runs fsl.bet using BoshTask""" - wf = Workflow(name="wf", input_spec=["maskfile", "infile"]) - wf.inputs.maskfile = maskfile - wf.inputs.infile = infile - wf.cache_dir = tmpdir - - wf.add( - BoshTask( - name="bet", - zenodo_id="1482743", - infile=wf.lzin.infile, - maskfile=wf.lzin.maskfile, + + def Workflow(maskfile, infile): + bet = workflow.add( + boutiques.define(zenodo_id="1482743")( + infile=infile, + maskfile=maskfile, + ) ) - ) - wf.set_output([("outfile", wf.bet.lzout.outfile)]) + return bet.outfile - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow(maskfile=maskfile, infile=infile) + wf(plugin=plugin, cache_dir=tmpdir) res = wf.result() assert res.output.outfile.name == "test_brain.nii.gz" @@ -132,39 +122,27 @@ def test_boutiques_wf_1(maskfile, plugin, tmpdir, infile): ) def test_boutiques_wf_2(maskfile, plugin, tmpdir, infile): """wf with two BoshTasks (fsl.bet and fsl.stats) and one ShellTask""" - wf = Workflow(name="wf", input_spec=["maskfile", "infile"]) - wf.inputs.maskfile = maskfile - wf.inputs.infile = infile - wf.cache_dir = tmpdir - - wf.add( - BoshTask( - name="bet", - zenodo_id="1482743", - infile=wf.lzin.infile, - maskfile=wf.lzin.maskfile, + + @workflow.define(outputs=["outfile_bet", "out_stat", "out"]) + def Workflow(maskfile, infile): + + bet = workflow.add( + boutiques.define(zenodo_id="1482743")( + infile=infile, + maskfile=maskfile, + ) ) - ) - # used to be "3240521", but can't access anymore - wf.add( - BoshTask( - name="stat", zenodo_id="4472771", input_file=wf.bet.lzout.outfile, v=True + # used to be "3240521", but can't access anymore + stat = workflow.add( + boutiques.define(zenodo_id="4472771")( + input_file=bet.outfile, + v=True, + ) ) - ) - wf.add(ShellTask(name="cat", executable="cat", args=wf.stat.lzout.output)) - - wf.set_output( - [ - ("outfile_bet", wf.bet.lzout.outfile), - ("out_stat", wf.stat.lzout.output), - ("out", wf.cat.lzout.stdout), - ] - ) + cat = workflow.add(shell.define("cat ")(file=stat.output)) + return bet.outfile, stat.output, cat.stdout - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() + res = Workflow(maskfile=maskfile, infile=infile)(plugin=plugin, cache_dir=tmpdir) assert res.output.outfile_bet.name == "test_brain.nii.gz" assert res.output.outfile_bet.exists() From 5d08b0e4fc8622c95d54d806d553b86e37116a12 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 14:32:56 +1100 Subject: [PATCH 068/342] added typing to Result to specify output type --- pydra/engine/specs.py | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 165c1cd94d..01521a73d0 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -168,7 +168,37 @@ def __call__( messengers=None, rerun=False, **kwargs, - ): + ) -> "Result[OutputsType]": + """Create a task from this specification and execute it to produce a result. + + Parameters + ---------- + name : str, optional + The name of the task, by default None + audit_flags : AuditFlag, optional + Auditing configuration, by default AuditFlag.NONE + cache_dir : os.PathLike, optional + Cache directory, by default None + cache_locations : list[os.PathLike], optional + Cache locations, by default None + inputs : str or File or dict, optional + Inputs for the task, by default None + cont_dim : dict, optional + Container dimensions for specific inputs, by default None + messenger_args : dict, optional + Messenger arguments, by default None + messengers : list, optional + Messengers, by default None + rerun : bool, optional + Whether to rerun the task, by default False + **kwargs + Additional keyword arguments to pass to the task + + Returns + ------- + Result + The result of the task + """ self._check_rules() task = self.Task( spec=self, @@ -322,10 +352,10 @@ class Runtime: @attrs.define(kw_only=True) -class Result: +class Result(ty.Generic[OutputsType]): """Metadata regarding the outputs of processing.""" - output: Outputs | None = None + output: OutputsType | None = None runtime: Runtime | None = None errored: bool = False From 70d0e068d3509c7560e6bbc9a287f1a7c8e13ad5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 15:58:13 +1100 Subject: [PATCH 069/342] reworked dockertask test --- pydra/engine/tests/test_dockertask.py | 605 ++++++++++---------------- pydra/engine/tests/utils.py | 50 ++- 2 files changed, 250 insertions(+), 405 deletions(-) diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index cc196cd87c..d1ba4d62ff 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,12 +1,9 @@ -import typing as ty import pytest -import attr - from ..task import ShellTask from ..submitter import Submitter -from ..core import Workflow -from ..specs import ShellOutputs, SpecInfo, File, ShellSpec +from fileformats.generic import File from ..environments import Docker +from pydra.design import shell, workflow from .utils import no_win, need_docker, result_submitter, result_no_submitter @@ -17,7 +14,7 @@ def test_docker_1_nosubm(): no submitter """ cmd = "whoami" - docky = ShellTask(name="docky", executable=cmd, environment=Docker(image="busybox")) + docky = shell.define(cmd)(environment=Docker(image="busybox")) assert docky.environment.image == "busybox" assert docky.environment.tag == "latest" assert isinstance(docky.environment, Docker) @@ -35,7 +32,7 @@ def test_docker_1(plugin): using submitter """ cmd = "whoami" - docky = ShellTask(name="docky", executable=cmd, environment=Docker(image="busybox")) + docky = shell.define(cmd)(environment=Docker(image="busybox")) with Submitter(plugin=plugin) as sub: docky(submitter=sub) @@ -52,12 +49,12 @@ def test_docker_2(results_function, plugin): """a command with arguments, cmd and args given as executable with and without submitter """ - cmd = ["echo", "hail", "pydra"] - docky = ShellTask(name="docky", executable=cmd, environment=Docker(image="busybox")) + cmdline = "echo hail pydra" + docky = shell.define(cmdline)(environment=Docker(image="busybox")) # cmdline doesn't know anything about docker - assert docky.cmdline == " ".join(cmd) + assert docky.cmdline == cmdline res = results_function(docky, plugin) - assert res.output.stdout.strip() == " ".join(cmd[1:]) + assert res.output.stdout.strip() == " ".join(cmdline.split()[1:]) assert res.output.return_code == 0 @@ -117,23 +114,12 @@ def test_docker_outputspec_1(plugin, tmp_path): customised output_spec, adding files to the output, providing specific pathname output_path is automatically added to the bindings """ - cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutputs,), + outputs = [shell.out(name="newfile", type=File, help_string="new file")] + docky = shell.define("touch newfile_tmp.txt", outputs=outputs)( + environment=Docker(image="ubuntu") ) - docky = ShellTask( - name="docky", - environment=Docker(image="ubuntu"), - executable=cmd, - output_spec=my_output_spec, - ) - - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) - res = docky.result() + res = docky(plugin=plugin) assert res.output.stdout == "" @@ -150,31 +136,20 @@ def test_docker_inputspec_1(tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + ) + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, file=filename, - input_spec=my_input_spec, strip=True, ) @@ -194,26 +169,19 @@ def test_docker_inputspec_1a(tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - default=filename, - metadata={"position": 1, "argstr": "", "help_string": "input file"}, - ), - ) - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file", + type=File, + default=filename, + position=1, + argstr="", + help_string="input file", + ) + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, strip=True, ) @@ -235,42 +203,27 @@ def test_docker_inputspec_2(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), - ), - ( - "file2", - attr.ib( - type=File, - default=filename_2, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( + inputs = [ + shell.arg( + name="file1", + type=File, + position=1, + argstr="", + help_string="input file 1", + ), + shell.arg( + name="file2", + type=File, + default=filename_2, + position=2, + argstr="", + help_string="input file 2", + ), + ] + docky = shell.define(cmd, inputs=inputs)( name="docky", environment=Docker(image="busybox"), - executable=cmd, file1=filename_1, - input_spec=my_input_spec, strip=True, ) @@ -293,43 +246,28 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): cmd = "cat" - # the field with default value can't be before value without default - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), - ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help_string="input file 1", + ), + shell.arg( + name="file2", + type=File, + mandatory=True, + position=2, + argstr="", + help_string="input file 2", + ), + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, file2=filename_2, - input_spec=my_input_spec, strip=True, ) assert docky.spec.file2.fspath == filename_2 @@ -354,43 +292,28 @@ def test_docker_inputspec_2a(plugin, tmp_path): cmd = "cat" - # if you want set default in the first field you can use default value - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help_string": "input file 1", - }, - ), - ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help_string": "input file 2", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help_string="input file 1", + ), + shell.arg( + name="file2", + type=File, + mandatory=True, + position=2, + argstr="", + help_string="input file 2", + ), + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, file2=filename_2, - input_spec=my_input_spec, strip=True, ) @@ -408,32 +331,21 @@ def test_docker_inputspec_3(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - "container_path": True, - }, - ), - ) - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + container_path=True, + ) + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, file=filename, - input_spec=my_input_spec, strip=True, ) @@ -456,41 +368,26 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help_string": "orig file", - "mandatory": True, - "copyfile": "copy", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help_string": "output file", - }, - ), - ), - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="orig_file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="orig file", + copyfile="copy", + ), + shell.arg( + name="out_file", + type=str, + output_file_template="{orig_file}", + help_string="output file", + ), + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, orig_file=str(file), ) @@ -498,7 +395,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): assert res.output.stdout == "" out_file = res.output.out_file.fspath assert out_file.exists() - # the file is copied, and than it is changed in place + # the file is copied, and then it is changed in place assert out_file.parent == docky.output_dir with open(out_file) as f: assert "hi from pydra\n" == f.read() @@ -522,34 +419,23 @@ def test_docker_inputspec_state_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + ) + ] + + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, strip=True, - ).split("file", file=[str(filename_1), str(filename_2)]) + ) - res = docky() + res = docky(split={"file": [str(filename_1), str(filename_2)]}) assert res[0].output.stdout == "hello from pydra" assert res[1].output.stdout == "have a nice one" @@ -569,36 +455,23 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): f.write("have a nice one") cmd = "cat" - filename = [] - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), - ) - ], - bases=(ShellSpec,), - ) - docky = ShellTask( - name="docky", + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + ) + ] + docky = shell.define(cmd, inputs=inputs)( environment=Docker(image="busybox"), - executable=cmd, - input_spec=my_input_spec, strip=True, - ).split("file", file=[str(file_1), str(file_2)]) + ) - res = docky() + res = docky(split={"file": [str(file_1), str(file_2)]}) assert res[0].output.stdout == "hello from pydra" assert res[1].output.stdout == "have a nice one" @@ -613,43 +486,31 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + ) + ] + + @workflow.define + def Workflow(cmd, file): + + docky = workflow.add( + shell.define(cmd, inputs=inputs)( + file=file, + environment=Docker(image="busybox"), + strip=True, ) - ], - bases=(ShellSpec,), - ) + ) - wf = Workflow(name="wf", input_spec=["cmd", "file"]) - wf.inputs.cmd = cmd - wf.inputs.file = filename - - docky = ShellTask( - name="docky", - environment=Docker(image="busybox"), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ) - wf.add(docky) + return docky.stdout - wf.set_output([("out", wf.docky.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + wf = Workflow(cmd=cmd, file=filename) res = wf.result() assert res.output.out == "hello from pydra" @@ -668,45 +529,34 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + ) + ] + + @workflow.define + def Workflow(cmd, file): + + docky = workflow.add( + shell.define(cmd, inputs=inputs)( + environment=Docker(image="busybox"), + file=file, + strip=True, ) - ], - bases=(ShellSpec,), - ) - - wf = Workflow(name="wf", input_spec=["cmd", "file"]) - wf.split(file=[str(file_1), str(file_2)]) - wf.inputs.cmd = cmd + ) - docky = ShellTask( - name="docky", - environment=Docker(image="busybox"), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ) - wf.add(docky) + return docky.stdout - wf.set_output([("out", wf.docky.lzout.stdout)]) + wf = Workflow(cmd=cmd) - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) + res = wf(split={"file": [file_1, file_2]}) - res = wf.result() assert res[0].output.out == "hello from pydra" assert res[1].output.out == "have a nice one" @@ -724,42 +574,31 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): cmd = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), + inputs = [ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help_string="input file", + ) + ] + + @workflow.define + def Workflow(cmd, file): + + docky = workflow.add( + shell.define(cmd, inputs=inputs)( + environment=Docker(image="busybox"), + file=file, + strip=True, ) - ], - bases=(ShellSpec,), - ) + ) - wf = Workflow(name="wf", input_spec=["cmd", "file"]) - wf.inputs.cmd = cmd + return docky.stdout - docky = ShellTask( - name="docky", - environment=Docker(image="busybox"), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ).split("file", file=[str(file_1), str(file_2)]) - wf.add(docky) + wf = Workflow(cmd=cmd) - wf.set_output([("out", wf.docky.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() + res = wf(split={"file": [str(file_1), str(file_2)]}) assert res.output.out == ["hello from pydra", "have a nice one"] diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 55a4ccb164..64fe7591c3 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -1,6 +1,7 @@ # Tasks for testing import time -import sys, shutil +import sys +import shutil import typing as ty from pathlib import Path import functools @@ -8,9 +9,8 @@ import subprocess as sp import pytest from fileformats.generic import File - -from ..core import Workflow from ..submitter import Submitter +from pydra.design import workflow from pydra import mark @@ -294,12 +294,14 @@ def gen_basic_wf(name="basic-wf"): ----------- out : int (9) """ - wf = Workflow(name=name, input_spec=["x"]) - wf.inputs.x = 5 - wf.add(fun_addtwo(name="task1", a=wf.lzin.x, b=0)) - wf.add(fun_addvar(name="task2", a=wf.task1.lzout.out, b=2)) - wf.set_output([("out", wf.task2.lzout.out)]) - return wf + + @workflow.define(outputs=["out"]) + def Workflow(x): + task1 = workflow.add(fun_addtwo(a=x, b=0)) + task2 = workflow.add(fun_addvar(a=task1.out, b=2)) + return task2.out + + return Workflow(x=5) def gen_basic_wf_with_threadcount(name="basic-wf-with-threadcount"): @@ -314,12 +316,14 @@ def gen_basic_wf_with_threadcount(name="basic-wf-with-threadcount"): ----------- out : int (9) """ - wf = Workflow(name=name, input_spec=["x"]) - wf.inputs.x = 5 - wf.add(fun_addtwo_with_threadcount(name="task1", a=wf.lzin.x, sgeThreads=4)) - wf.add(fun_addvar(name="task2", a=wf.task1.lzout.out, b=2)) - wf.set_output([("out", wf.task2.lzout.out)]) - return wf + + @workflow.define(outputs=["out"]) + def Workflow(x): + task1 = workflow.add(fun_addtwo_with_threadcount(a=x, sgeThreads=4)) + task2 = workflow.add(fun_addvar(a=task1.out, b=2)) + return task2.out + + return Workflow(x=5) def gen_basic_wf_with_threadcount_concurrent(name="basic-wf-with-threadcount"): @@ -334,13 +338,15 @@ def gen_basic_wf_with_threadcount_concurrent(name="basic-wf-with-threadcount"): ----------- out : int (9) """ - wf = Workflow(name=name, input_spec=["x"]) - wf.inputs.x = 5 - wf.add(fun_addtwo_with_threadcount(name="task1_1", a=wf.lzin.x, sgeThreads=4)) - wf.add(fun_addtwo_with_threadcount(name="task1_2", a=wf.lzin.x, sgeThreads=2)) - wf.add(fun_addvar(name="task2", a=wf.task1_1.lzout.out, b=2)) - wf.set_output([("out1", wf.task2.lzout.out), ("out2", wf.task1_2.lzout.out)]) - return wf + + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x): + task1_1 = workflow.add(fun_addtwo_with_threadcount(a=x, sgeThreads=4)) + task1_2 = workflow.add(fun_addtwo_with_threadcount(a=x, sgeThreads=2)) + task2 = workflow.add(fun_addvar(a=task1_1.out, b=2)) + return task2.out, task1_2.out + + return Workflow(x=5) @mark.task From 1daec25cd6fb02f6cb3dfb31772c979feaf6255b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 16:55:43 +1100 Subject: [PATCH 070/342] got tests to load --- pydra/design/shell.py | 25 ++- pydra/engine/tests/test_environments.py | 88 +++------ pydra/engine/tests/test_helpers.py | 2 - pydra/engine/tests/test_helpers_file.py | 2 +- pydra/engine/tests/test_nipype1_convert.py | 80 +++----- pydra/engine/tests/test_node_task.py | 2 +- pydra/engine/tests/test_numpy_examples.py | 1 - pydra/engine/tests/test_profiles.py | 1 - pydra/engine/tests/test_shelltask.py | 11 +- .../engine/tests/test_shelltask_inputspec.py | 61 ++---- pydra/engine/tests/test_singularity.py | 3 +- pydra/engine/tests/test_specs.py | 14 +- pydra/engine/tests/test_submitter.py | 2 +- pydra/engine/tests/test_task.py | 11 +- pydra/engine/tests/test_tasks_files.py | 3 +- pydra/engine/tests/test_workflow.py | 3 +- pydra/utils/tests/test_typing.py | 6 +- pydra/utils/tests/utils.py | 186 +++++------------- 18 files changed, 182 insertions(+), 319 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 2410a410ae..b654acd77b 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -328,7 +328,13 @@ def make( input_helps=input_helps, output_helps=output_helps, ) - class_name = re.sub(r"[^\w]", "_", executable) if not name else name + if name: + class_name = name + else: + class_name = ( + "_".join(executable) if isinstance(executable, list) else executable + ) + class_name = re.sub(r"[^\w]", "_", class_name) if class_name[0].isdigit(): class_name = f"_{class_name}" @@ -457,10 +463,19 @@ def parse_command_line_template( else: assert outputs is None outputs = {} - parts = template.split(maxsplit=1) - if len(parts) == 1: - return template, inputs, outputs - executable, args_str = parts + parts = template.split() + executable = [] + for i, part in enumerate(parts, start=1): + if part.startswith("<") or part.startswith("-"): + break + executable.append(part) + if not executable: + raise ValueError(f"Found no executable in command line template: {template}") + if len(executable) == 1: + executable = executable[0] + if i == len(parts): + return executable, inputs, outputs + args_str = " ".join(parts[i:]) tokens = re.split(r"\s+", args_str.strip()) arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+(?:\?|=[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index be5c7393d3..85366d6052 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -4,13 +4,10 @@ from ..task import ShellTask from ..submitter import Submitter from ..specs import ( - ShellSpec, - SpecInfo, File, ) +from pydra.design import shell from .utils import no_win, need_docker, need_singularity - -import attr import pytest @@ -176,33 +173,22 @@ def test_singularity_1_subm(tmp_path, plugin): def create_shelly_inputfile(tempdir, filename, name, executable): """creating a task with a simple input_spec""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "position": 1, - "help_string": "files", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellSpec,), - ) + inputs = [ + shell.arg( + name="file", + type=File, + position=1, + help_string="files", + mandatory=True, + argstr="", + ) + ] kwargs = {} if filename is None else {"file": filename} - shelly = ShellTask( - name=name, - executable=executable, - cache_dir=makedir(tempdir, name), - input_spec=my_input_spec, - **kwargs, - ) + shelly = shell.define( + executable, + input=inputs, + )(**kwargs) return shelly @@ -363,35 +349,25 @@ def test_docker_fileinp_st(tmp_path): def create_shelly_outputfile(tempdir, filename, name, executable="cp"): """creating a task with an input_spec that contains a template""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help_string": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellSpec,), - ) + my_input_spec = [ + shell.arg( + name="file_orig", + type=File, + position=2, + help_string="new file", + argstr="", + ), + shell.arg( + name="file_copy", + type=str, + output_file_template="{file_orig}_copy", + help_string="output file", + argstr="", + ), + ] kwargs = {} if filename is None else {"file_orig": filename} - shelly = ShellTask( - name=name, - executable=executable, + shelly = shell.define(executable)( cache_dir=makedir(tempdir, name), input_spec=my_input_spec, **kwargs, diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 0eb5c4156b..69e7cc71ad 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -5,7 +5,6 @@ import platform import typing as ty import pytest -import attrs import cloudpickle as cp from unittest.mock import Mock from fileformats.generic import Directory, File @@ -20,7 +19,6 @@ parse_format_string, ) from pydra.utils.hash import hash_function -from ..core import Workflow def test_save(tmpdir): diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 7db3f8d34f..64bd34a7e5 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -5,7 +5,7 @@ from unittest.mock import Mock import pytest from fileformats.generic import File -from ..specs import SpecInfo, ShellSpec +from ..specs import ShellSpec from ..task import ShellTask from ..helpers_file import ( ensure_list, diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index 2f5abbfb76..17384b7644 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -1,76 +1,56 @@ import typing as ty import pytest +from pathlib import Path +from pydra.engine.specs import ShellOutputs, ShellSpec +from fileformats.generic import File +from pydra.design import shell -from ..task import ShellTask -from ..specs import ShellOutputs, ShellSpec, SpecInfo, File +def find_txt(output_dir: Path) -> File: + files = list(output_dir.glob("*.txt")) + assert len(files) == 1 + return files[0] -interf_input_spec = SpecInfo( - name="Input", fields=[("test", ty.Any, {"help_string": "test"})], bases=(ShellSpec,) -) -interf_output_spec = SpecInfo( - name="Output", fields=[("test_out", File, "*.txt")], bases=(ShellOutputs,) -) +interf_inputs = [shell.arg(name="test", type=ty.Any, help_string="test")] +interf_outputs = [shell.out(name="test_out", type=File, callable=find_txt)] -class Interf_1(ShellTask): - """class with customized input/output specs""" - input_spec = interf_input_spec - output_spec = interf_output_spec +Interf_1 = shell.define(inputs=interf_inputs, outputs=interf_outputs) +Interf_2 = shell.define("testing command", inputs=interf_inputs, outputs=interf_outputs) -class Interf_2(ShellTask): - """class with customized input/output specs and executables""" +@shell.define +class Interf_3(ShellSpec["Interf_3.Outputs"]): + """class with customized input and executables""" - input_spec = interf_input_spec - output_spec = interf_output_spec - executable = "testing command" + executable = ["testing", "command"] + in_file: str = shell.arg(help_string="in_file", argstr="{in_file}") -class Interf_3(ShellTask): - """class with customized input and executables""" + @shell.outputs + class Outputs(ShellOutputs): + pass - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - str, - {"help_string": "in_file", "argstr": "'{in_file}'"}, - ) - ], - bases=(ShellSpec,), - ) - executable = "testing command" - -class TouchInterf(ShellTask): +@shell.define +class TouchInterf(ShellSpec["TouchInterf.Outputs"]): """class with customized input and executables""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "new_file", - str, - { - "help_string": "new_file", - "argstr": "", - "output_file_template": "{new_file}", - }, - ) - ], - bases=(ShellSpec,), + new_file: str = shell.outarg( + help_string="new_file", argstr="", path_template="{new_file}" ) executable = "touch" + @shell.outputs + class Outputs(ShellOutputs): + pass + def test_interface_specs_1(): """testing if class input/output spec are set properly""" - task = Interf_1(executable="ls") - assert task.input_spec == interf_input_spec - assert task.output_spec == interf_output_spec + task_spec = Interf_1(executable="ls") + assert task.Outputs == Interf_1.Outputs def test_interface_specs_2(): diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 728750594b..154afa5534 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -25,7 +25,7 @@ ) from ..core import Task -from ..specs import StateArray +from pydra.utils.typing import StateArray from ..submitter import Submitter diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index e866987126..e770011b9c 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -7,7 +7,6 @@ from ..submitter import Submitter -from ..core import Workflow from pydra.mark import task, annotate from .utils import identity from pydra.utils.hash import hash_function diff --git a/pydra/engine/tests/test_profiles.py b/pydra/engine/tests/test_profiles.py index d3070ddeab..0a0f5e17b5 100644 --- a/pydra/engine/tests/test_profiles.py +++ b/pydra/engine/tests/test_profiles.py @@ -1,4 +1,3 @@ -from ..core import Workflow from ..helpers import load_task from pydra import mark diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index b8591092f4..9b70dbc6b9 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1,7 +1,7 @@ import attr import typing as ty -import os, sys -import subprocess as sp +import os +import sys import pytest from pathlib import Path import re @@ -9,14 +9,15 @@ from ..task import ShellTask from ..submitter import Submitter -from ..core import Workflow from ..specs import ( ShellOutputs, ShellSpec, - SpecInfo, +) +from fileformats.generic import ( File, Directory, - MultiInputFile, +) +from pydra.utils.typing import ( MultiOutputFile, MultiInputObj, ) diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 7b95ea558f..b75c20a8a2 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -4,13 +4,12 @@ import pytest from ..task import ShellTask -from ..specs import ( +from pydra.engine.specs import ( ShellOutputs, ShellSpec, - SpecInfo, File, - MultiInputObj, ) +from pydra.design import shell def test_shell_cmd_execargs_1(): @@ -2167,45 +2166,27 @@ def test_shell_cmd_inputs_denoise_image( # tests with XOR in input metadata -class SimpleTaskXor(ShellTask): - input_fields = [ - ( - "input_1", - str, - { - "help_string": "help", - "mandatory": True, - "xor": ("input_1", "input_2", "input_3"), - }, - ), - ( - "input_2", - bool, - { - "help_string": "help", - "mandatory": True, - "argstr": "--i2", - "xor": ("input_1", "input_2", "input_3"), - }, - ), - ( - "input_3", - bool, - { - "help_string": "help", - "mandatory": True, - "xor": ("input_1", "input_2", "input_3"), - }, - ), - ] - task_input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellSpec,)) - task_output_fields = [] - task_output_spec = SpecInfo( - name="Output", fields=task_output_fields, bases=(ShellOutputs,) +@shell.define +class SimpleTaskXor(ShellSpec["SimpleTaskXor.Outputs"]): + + input_1: str = shell.arg( + help_string="help", + xor=("input_1", "input_2", "input_3"), + ) + input_2: bool = shell.arg( + help_string="help", + argstr="--i2", + xor=("input_1", "input_2", "input_3"), ) + input_3: bool = shell.arg( + help_string="help", + xor=("input_1", "input_2", "input_3"), + ) + + @shell.outputs + class Outputs(ShellOutputs): + pass - input_spec = task_input_spec - output_spec = task_output_spec executable = "cmd" diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 0af3792444..7eec9b01dc 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -5,8 +5,7 @@ from ..task import ShellTask from ..submitter import Submitter -from ..core import Workflow -from ..specs import ShellOutputs, SpecInfo, File, ShellSpec +from ..specs import ShellOutputs, File, ShellSpec from ..environments import Singularity diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index c06d9c6e17..1e054a4eb2 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -7,23 +7,21 @@ import time from ..specs import ( - BaseSpec, - SpecInfo, File, Runtime, Result, ShellSpec, - # ContainerSpec, - LazyIn, - LazyOut, - # LazyField, - StateArray, ) +from pydra.engine.workflow.lazy import ( + LazyInField, + LazyOutField, +) + +from pydra.utils.typing import StateArray # from ..helpers import make_klass from .utils import foo from pydra import mark -from pydra.engine import Workflow import pytest diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 9d1cd9f3bd..4848845007 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -17,7 +17,7 @@ gen_basic_wf_with_threadcount, gen_basic_wf_with_threadcount_concurrent, ) -from ..core import Workflow, Task +from ..core import Task from ..submitter import Submitter from ..workers import SerialWorker from pydra import mark diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 4a481e9d92..77d8529bd6 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -9,15 +9,14 @@ import glob as glob from pydra import mark from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages -from ..core import Workflow -from ..task import AuditFlag, ShellTask, argstr_formatting +from ..task import AuditFlag, ShellTask +from pydra.engine.specs import argstr_formatting from .utils import gen_basic_wf -from ..specs import ( +from pydra.utils.typing import ( MultiInputObj, MultiOutputObj, - SpecInfo, - FunctionSpec, - BaseSpec, +) +from ..specs import ( ShellSpec, File, ) diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index 8d22a415e1..697afae9c3 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -5,9 +5,8 @@ import typing as ty from ..submitter import Submitter -from ..core import Workflow from pydra import mark -from ..specs import File, Directory +from fileformats.generic import File, Directory @mark.task diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 791b6a0123..d106134eb6 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -34,9 +34,8 @@ DOT_FLAG, ) from ..submitter import Submitter -from ..core import Workflow from pydra import mark -from ..specs import SpecInfo, BaseSpec, ShellSpec +from ..specs import ShellSpec from pydra.utils import exc_info_matches diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index 37002f727e..56e84ba4d2 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -6,9 +6,9 @@ import tempfile import pytest from pydra import mark -from pydra.engine.specs import File, LazyOutField, MultiInputObj -from ..typing import TypeParser -from pydra.engine import Workflow +from pydra.engine.specs import File +from pydra.engine.workflow.lazy import LazyOutField +from ..typing import TypeParser, MultiInputObj from fileformats.application import Json, Yaml, Xml from .utils import ( generic_func_task, diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 9760704b38..169867fce6 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -3,6 +3,7 @@ from pydra import mark from pydra.engine.task import ShellTask from pydra.engine import specs +from pydra.design import shell, python class MyFormat(WithMagicNumber, File): @@ -24,54 +25,29 @@ class MyOtherFormatX(WithMagicNumber, WithSeparateHeader, File): header_type = MyHeader -@mark.task +@python.define def generic_func_task(in_file: File) -> File: return in_file -generic_shell_input_fields = [ - ( - "in_file", - File, - { - "help_string": "the input file", - "argstr": "", - "copyfile": "copy", - }, - ), - ( - "out", - str, - { - "help_string": "output file name", - "argstr": "", - "position": -1, - "output_file_template": "{in_file}", - }, - ), -] - -generic_shell_input_spec = specs.SpecInfo( - name="Input", fields=generic_shell_input_fields, bases=(specs.ShellSpec,) -) - -generic_shell_output_fields = [ - ( - "out", - File, - { - "help_string": "output file", - }, - ), -] -generic_shelloutput_spec = specs.SpecInfo( - name="Output", fields=generic_shell_output_fields, bases=(specs.ShellOutputs,) -) - - -class GenericShellTask(ShellTask): - input_spec = generic_shell_input_spec - output_spec = generic_shelloutput_spec +@shell.define +class GenericShellTask(specs.ShellSpec["GenericShellTask.Outputs"]): + """class with customized input and executables""" + + in_file: File = shell.arg( + help_string="the input file", + argstr="", + copy_mode="copy", + ) + + class Outputs(specs.ShellOutputs): + out: File = shell.outarg( + help_string="output file name", + argstr="", + position=-1, + path_template="{in_file}", + ) + executable = "echo" @@ -80,102 +56,46 @@ def specific_func_task(in_file: MyFormatX) -> MyFormatX: return in_file -specific_shell_input_fields = [ - ( - "in_file", - MyFormatX, - { - "help_string": "the input file", - "argstr": "", - "copyfile": "copy", - "sep": " ", - }, - ), - ( - "out", - str, - { - "help_string": "output file name", - "argstr": "", - "position": -1, - "output_file_template": "{in_file}", # Pass through un-altered - }, - ), -] - -specific_shell_input_spec = specs.SpecInfo( - name="Input", fields=specific_shell_input_fields, bases=(specs.ShellSpec,) -) - -specific_shell_output_fields = [ - ( - "out", - MyFormatX, - { - "help_string": "output file", - }, - ), -] -specific_shelloutput_spec = specs.SpecInfo( - name="Output", fields=specific_shell_output_fields, bases=(specs.ShellOutputs,) -) - - -class SpecificShellTask(ShellTask): - input_spec = specific_shell_input_spec - output_spec = specific_shelloutput_spec +@shell.define +class SpecificShellTask(specs.ShellSpec["SpecificShellTask.Outputs"]): executable = "echo" + in_file: MyFormatX = shell.arg( + help_string="the input file", + argstr="", + copy_mode="copy", + sep=" ", + ) -@mark.task + class Outputs(specs.ShellOutputs): + out: MyFormatX = shell.outarg( + help_string="output file name", + argstr="", + position=-1, + path_template="{in_file}", # Pass through un-altered + ) + + +@python.define def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: return in_file -other_specific_shell_input_fields = [ - ( - "in_file", - MyOtherFormatX, - { - "help_string": "the input file", - "argstr": "", - "copyfile": "copy", - "sep": " ", - }, - ), - ( - "out", - str, - { - "help_string": "output file name", - "argstr": "", - "position": -1, - "output_file_template": "{in_file}", # Pass through un-altered - }, - ), -] - -other_specific_shell_input_spec = specs.SpecInfo( - name="Input", fields=other_specific_shell_input_fields, bases=(specs.ShellSpec,) -) - -other_specific_shell_output_fields = [ - ( - "out", - MyOtherFormatX, - { - "help_string": "output file", - }, - ), -] -other_specific_shelloutput_spec = specs.SpecInfo( - name="Output", - fields=other_specific_shell_output_fields, - bases=(specs.ShellOutputs,), -) +class OtherSpecificShellTask(ShellTask): + in_file: MyOtherFormatX = shell.arg( + help_string="the input file", + argstr="", + copy_mode="copy", + sep=" ", + ) + + class Outputs(specs.ShellOutputs): + out: MyOtherFormatX = shell.outarg( + help_string="output file name", + argstr="", + position=-1, + path_template="{in_file}", # Pass through un-altered + ) -class OtherSpecificShellTask(ShellTask): - input_spec = other_specific_shell_input_spec - output_spec = other_specific_shelloutput_spec executable = "echo" From 0dc7ec60e56ab76b7e06a6f8b22fec689ff5b36a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 20:16:04 +1100 Subject: [PATCH 071/342] renamed docs to old-docs --- {docs => old-docs}/.gitignore | 0 {docs => old-docs}/Makefile | 0 {docs => old-docs}/api.rst | 0 {docs => old-docs}/changes.rst | 0 {docs => old-docs}/combiner.rst | 0 {docs => old-docs}/components.rst | 0 {docs => old-docs}/conf.py | 4 ++-- {docs => old-docs}/images/nd_spl_1.png | Bin {docs => old-docs}/images/nd_spl_3.png | Bin {docs => old-docs}/images/nd_spl_3_comb1.png | Bin {docs => old-docs}/images/nd_spl_3_comb3.png | Bin {docs => old-docs}/images/nd_spl_4.png | Bin {docs => old-docs}/index.rst | 0 {docs => old-docs}/input_spec.rst | 0 {docs => old-docs}/logo/pydra_logo.jpg | Bin {docs => old-docs}/logo/pydra_logo.png | Bin {docs => old-docs}/logo/pydra_logo.svg | 0 {docs => old-docs}/output_spec.rst | 0 {docs => old-docs}/requirements.txt | 0 {docs => old-docs}/sphinxext/github_link.py | 0 {docs => old-docs}/state.rst | 0 {docs => old-docs}/user_guide.rst | 0 22 files changed, 2 insertions(+), 2 deletions(-) rename {docs => old-docs}/.gitignore (100%) rename {docs => old-docs}/Makefile (100%) rename {docs => old-docs}/api.rst (100%) rename {docs => old-docs}/changes.rst (100%) rename {docs => old-docs}/combiner.rst (100%) rename {docs => old-docs}/components.rst (100%) rename {docs => old-docs}/conf.py (97%) rename {docs => old-docs}/images/nd_spl_1.png (100%) rename {docs => old-docs}/images/nd_spl_3.png (100%) rename {docs => old-docs}/images/nd_spl_3_comb1.png (100%) rename {docs => old-docs}/images/nd_spl_3_comb3.png (100%) rename {docs => old-docs}/images/nd_spl_4.png (100%) rename {docs => old-docs}/index.rst (100%) rename {docs => old-docs}/input_spec.rst (100%) rename {docs => old-docs}/logo/pydra_logo.jpg (100%) rename {docs => old-docs}/logo/pydra_logo.png (100%) rename {docs => old-docs}/logo/pydra_logo.svg (100%) rename {docs => old-docs}/output_spec.rst (100%) rename {docs => old-docs}/requirements.txt (100%) rename {docs => old-docs}/sphinxext/github_link.py (100%) rename {docs => old-docs}/state.rst (100%) rename {docs => old-docs}/user_guide.rst (100%) diff --git a/docs/.gitignore b/old-docs/.gitignore similarity index 100% rename from docs/.gitignore rename to old-docs/.gitignore diff --git a/docs/Makefile b/old-docs/Makefile similarity index 100% rename from docs/Makefile rename to old-docs/Makefile diff --git a/docs/api.rst b/old-docs/api.rst similarity index 100% rename from docs/api.rst rename to old-docs/api.rst diff --git a/docs/changes.rst b/old-docs/changes.rst similarity index 100% rename from docs/changes.rst rename to old-docs/changes.rst diff --git a/docs/combiner.rst b/old-docs/combiner.rst similarity index 100% rename from docs/combiner.rst rename to old-docs/combiner.rst diff --git a/docs/components.rst b/old-docs/components.rst similarity index 100% rename from docs/components.rst rename to old-docs/components.rst diff --git a/docs/conf.py b/old-docs/conf.py similarity index 97% rename from docs/conf.py rename to old-docs/conf.py index fd0b69ca43..6ab4140a4f 100644 --- a/docs/conf.py +++ b/old-docs/conf.py @@ -16,8 +16,8 @@ sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) sys.path.insert(1, str(Path(__file__).parent / "sphinxext")) -from pydra import __version__ -from github_link import make_linkcode_resolve +from pydra import __version__ # noqa: E402 +from github_link import make_linkcode_resolve # noqa: E402 # -- Project information ----------------------------------------------------- diff --git a/docs/images/nd_spl_1.png b/old-docs/images/nd_spl_1.png similarity index 100% rename from docs/images/nd_spl_1.png rename to old-docs/images/nd_spl_1.png diff --git a/docs/images/nd_spl_3.png b/old-docs/images/nd_spl_3.png similarity index 100% rename from docs/images/nd_spl_3.png rename to old-docs/images/nd_spl_3.png diff --git a/docs/images/nd_spl_3_comb1.png b/old-docs/images/nd_spl_3_comb1.png similarity index 100% rename from docs/images/nd_spl_3_comb1.png rename to old-docs/images/nd_spl_3_comb1.png diff --git a/docs/images/nd_spl_3_comb3.png b/old-docs/images/nd_spl_3_comb3.png similarity index 100% rename from docs/images/nd_spl_3_comb3.png rename to old-docs/images/nd_spl_3_comb3.png diff --git a/docs/images/nd_spl_4.png b/old-docs/images/nd_spl_4.png similarity index 100% rename from docs/images/nd_spl_4.png rename to old-docs/images/nd_spl_4.png diff --git a/docs/index.rst b/old-docs/index.rst similarity index 100% rename from docs/index.rst rename to old-docs/index.rst diff --git a/docs/input_spec.rst b/old-docs/input_spec.rst similarity index 100% rename from docs/input_spec.rst rename to old-docs/input_spec.rst diff --git a/docs/logo/pydra_logo.jpg b/old-docs/logo/pydra_logo.jpg similarity index 100% rename from docs/logo/pydra_logo.jpg rename to old-docs/logo/pydra_logo.jpg diff --git a/docs/logo/pydra_logo.png b/old-docs/logo/pydra_logo.png similarity index 100% rename from docs/logo/pydra_logo.png rename to old-docs/logo/pydra_logo.png diff --git a/docs/logo/pydra_logo.svg b/old-docs/logo/pydra_logo.svg similarity index 100% rename from docs/logo/pydra_logo.svg rename to old-docs/logo/pydra_logo.svg diff --git a/docs/output_spec.rst b/old-docs/output_spec.rst similarity index 100% rename from docs/output_spec.rst rename to old-docs/output_spec.rst diff --git a/docs/requirements.txt b/old-docs/requirements.txt similarity index 100% rename from docs/requirements.txt rename to old-docs/requirements.txt diff --git a/docs/sphinxext/github_link.py b/old-docs/sphinxext/github_link.py similarity index 100% rename from docs/sphinxext/github_link.py rename to old-docs/sphinxext/github_link.py diff --git a/docs/state.rst b/old-docs/state.rst similarity index 100% rename from docs/state.rst rename to old-docs/state.rst diff --git a/docs/user_guide.rst b/old-docs/user_guide.rst similarity index 100% rename from docs/user_guide.rst rename to old-docs/user_guide.rst From e2541777f502f8c6e9e29120975dea0f03357bf6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 21:26:50 +1100 Subject: [PATCH 072/342] added new docs structure --- docs/Makefile | 192 +++++++++ docs/make.bat | 263 ++++++++++++ docs/source/_static/images/nd_spl_1.png | Bin 0 -> 30961 bytes docs/source/_static/images/nd_spl_3.png | Bin 0 -> 26547 bytes docs/source/_static/images/nd_spl_3_comb1.png | Bin 0 -> 27292 bytes docs/source/_static/images/nd_spl_3_comb3.png | Bin 0 -> 28176 bytes docs/source/_static/images/nd_spl_4.png | Bin 0 -> 16849 bytes docs/source/_static/logo/pydra_logo.jpg | Bin 0 -> 14825 bytes docs/source/_static/logo/pydra_logo.png | Bin 0 -> 3047 bytes docs/source/_static/logo/pydra_logo.svg | 150 +++++++ docs/source/conf.py | 396 ++++++++++++++++++ docs/source/explanation/hashing-caching.rst | 176 ++++++++ docs/source/explanation/lazy-evaluation.rst | 4 + docs/source/explanation/provenance.rst | 4 + docs/source/howto/create-task-package.rst | 4 + docs/source/howto/install.rst | 4 + docs/source/howto/port-from-nipype.rst | 4 + docs/source/index.rst | 116 +++++ docs/source/reference/api.rst | 27 ++ docs/source/tutorial/execution.ipynb | 23 + docs/source/tutorial/shell.ipynb | 23 + docs/source/tutorial/task.ipynb | 298 +++++++++++++ docs/source/tutorial/workflow.ipynb | 23 + pydra/engine/__init__.py | 8 +- pyproject.toml | 4 +- 25 files changed, 1714 insertions(+), 5 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/_static/images/nd_spl_1.png create mode 100644 docs/source/_static/images/nd_spl_3.png create mode 100644 docs/source/_static/images/nd_spl_3_comb1.png create mode 100644 docs/source/_static/images/nd_spl_3_comb3.png create mode 100644 docs/source/_static/images/nd_spl_4.png create mode 100644 docs/source/_static/logo/pydra_logo.jpg create mode 100644 docs/source/_static/logo/pydra_logo.png create mode 100644 docs/source/_static/logo/pydra_logo.svg create mode 100644 docs/source/conf.py create mode 100644 docs/source/explanation/hashing-caching.rst create mode 100644 docs/source/explanation/lazy-evaluation.rst create mode 100644 docs/source/explanation/provenance.rst create mode 100644 docs/source/howto/create-task-package.rst create mode 100644 docs/source/howto/install.rst create mode 100644 docs/source/howto/port-from-nipype.rst create mode 100644 docs/source/index.rst create mode 100644 docs/source/reference/api.rst create mode 100644 docs/source/tutorial/execution.ipynb create mode 100644 docs/source/tutorial/shell.ipynb create mode 100644 docs/source/tutorial/task.ipynb create mode 100644 docs/source/tutorial/workflow.ipynb diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000..e6d46dcbcc --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,192 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pype9.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pype9.qhc" + +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Pype9" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pype9" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000000..523fa3eb58 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,263 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source +set I18NSPHINXOPTS=%SPHINXOPTS% source +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + echo. coverage to run coverage check of the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 2> nul +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Pype9.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Pype9.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "coverage" ( + %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage + if errorlevel 1 exit /b 1 + echo. + echo.Testing of coverage in the sources finished, look at the ^ +results in %BUILDDIR%/coverage/python.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/docs/source/_static/images/nd_spl_1.png b/docs/source/_static/images/nd_spl_1.png new file mode 100644 index 0000000000000000000000000000000000000000..e4967901dcde2843a01b36245e22123ec53acffb GIT binary patch literal 30961 zcmdqIg;SPa)HX^9NC=34l7fWNO1Ff9bV-+hbVzr%2#9nGNC^l?cL^vU-Q8W%%~|~3 z^Uj$w^Zf-MGs?rW@7;T^b**b%cZj^KI5q|`1_A;C_8SQ?MFa#SdH9Edjs)MRrv)9r zue%>4)a?)uF!68yAtEFuKSV%yjPOQGSlKywYuZIk`TYgj9tkBuKMD$|;sY@mg?1)( z;a3I^D%0E^nh+8%e^Jv=Pozqe3O%ne&yKxI{a%vt+oxw3536yqVsrgSo@9D=p>I4| zIqC3wQgXL6g^g{t_ef`#1FJP*vhOT)m8bWLlqBr6D2ycb$GzD*@H-v}2?qh8;Q#Lb zeaT?1Y7b&0vy|z3Qw22D)Fi}0LP932+uGXRoC5V2;ZY) zV#?i4cC>39O@q`eya`B2yA9fZFYkZ)^r^eM`}EW~K%_bKL1-Hu9-cu*_^W+HQ`L5>3vHq0HdK)kEv>B=C%Y_V+|U;+-`%@+74xD6{Fab&F_3V+KIpK4(wn6y zxw1u`5V8I1i(QED`}ZFI&kl>ecNlpthO>&q0!nJ2>0BSz;(_|mmJ zS!o^d@uR-($F_wmsW?!bms9*1tH(QeR~r{`|JTmq(qtf*?w^` zYXbukD>Dq|=TE!%=g*&0Qc}iNGQOf9?O;;>>0h`MEkT{cWnIytz@mmDZR-1efm@a@ z=n*lo>&^8Q)c&T;%ZC++yBP2{bh=Z{7x!5nw8H6A51 zVmbcv{oY)&yQ(2-8Cx<8nUWGwRG^su;k?hQd2TDHrK6(*NBzZ%{f*&#iCSV}e&-L+ zI-EFZ+JpW5k6~fUwNwbmt>`$6L5g`;n3z$LS!&4{5iluA(LmftF{<%WW1M4Z z#N_c=hllpDj{W`p&z?P#;LH43S}G+gYea^rU!s<$yswSt&5Y};=Q%JqST3c!(wo-n z#G}&O)5FZk8OvmV7usf9oxJ-I!3q?`)#NSd7 z{-z0~z1LAwd$>c2i4Tm0|3k0QyFXR`~djb-r_ zoUrMVb~tQ~$k0a3>yx8nBjKQ+5cx17UkK=rKF1}7dmsVEoxY66BNNb@%_CubGx zp&A$OY&ilyn2YqWDw|AcT1;&0HvafK&L`7O>c%=27K~6F-4acU?(2A;k&kb(x!Gse zwh19xGAk*G1aG?0i=t5p7YUXLmx_ui*rY3#>C~|7;g26b%ts2+=^C{VX5RUuT@L=} zmVWbQzLt=cdi$fy)H;$sf)5)1B6m+~t7y8s-uvuV`ccDURQLQ5loFdtX{2I{Iqkls zyxKoJ#KOjo`D6I+-#?h{$h{~0bNHI0kMD>4(5e48=p~hx!5Fhf@y%W5rQvDw3UMC&qDr=DaU*hD?835N2AONbGyEyy#)lQp+q zWmlwWZEI_5Z10{Q{~X?H=O!Gi-2!Y>mnObXd)^ceXVgis_Pw2;864KSQdy|n2$jh# zW*bt1W0|!(ohbDbCxmAzEGJKY+$n(idS(R-nDtI0cN;x!o6|Gf+9HQP|1>vK^73k= z%Wus#rtNApZGgSWUfJ0R&*vWbWLf0LP^ISz*5wV@WrhbI!6SZ48MbwBh>@Uvks!?+ zcc6`_Mub8hKUCwe8T^!*+J9HXTMm;JDAng)v#Lb3*0-^tEo>i zIwB|*6`A9)U=+N0d3j>&PCHr+n9nl=JfKaV7stlO zUx|z3D;ODN&wodDMg`Mie-17!Mw02`b&+em z3SNa0nveb%J_sWhbgwe{o2ct`bu^GAg-ya~0qu*4{pw2>J>ppjz$5}gX1CIv@a4-F z@d)zj+S;I?AaEmE7ryXA!es>m8J(SNEG{m#S#0NRR8>)lRbveQ^hvASEJlz6dB_*F ziF#+YQBF+1Bz{-qQ@-75kK6Iqvy-Ks6u$bpI$p;uu%9MW#B4^NQg>}vds2RA*D~wW zonD?Dn3eM~Fqn@OzY}A(Uy~1CwC7v-x7Y!`)obZD-Z4#wjRh{J!^V(p5hEibbw*2z zNc}YyahL=()$Lktx{!Jk!;OeXPwM>?96VQxOa6(NV*@N1n4?f47S7*@fG?OLC17p` z3!y9{c}h6|6<}m5$Zw(R1=Zz}nX=NtJ$L(1hE+Lcz*01pLV{hNL7fLSV3%IN{W?{>JBep8FC zWnp2_MkkjhC?f_ZssCUuf42*#TZnJXNU%_}*R#uId`&|tl7dr0$+P;4M8XO$V1Se6q@p`Pp zpwzh6b48e)SSE?na?89M?jJL=Y;R}Sc7LWggGxbhNlD1Zj{~dRfDeh`&|T~{Hoj{O zjMH&79r*U<^)!AE_EUU({IC;31P!>D&$cGYLxT?~++c)xqCxlO=jXS&UWXA+*SXjM zSYT#mhAm}aX8!SqLj-mOo+x>n$??v>VDRUAsp(M81~~0-)QpXdJKEciH-@FsM9a;F zCHXo|r=4c&_d6M23(rRiHN%F;$sazH5CiDA+i>yYc{q+dArj6f*(}9BhA%?IGH!y5 z0H4n~FJf=UJ$m#=j5<;xIM?Ii7-o<=Zv%!s+nA;$z278TI>GjIZ{80Lrva7-m4Bba zx-Rnb=e?_w*-FzvqnSDvRQ{i_etv!u)L$gC+PS%3@_;+aG#HKTJZ9M}kcwlTcvJyn z7P>yogyxseYYRK!Fu^rQi-~c#Jhkx{kj?BT<^J$>!8gv^8-eO$NJxLq?~RhMw~myOs6-jFpVBcfZ0=0g;?qj^2`b6Ue*nV)n=Wu*ygO5`sjRHr%Js9P#Ky)Z zPf7fPagfvQ3>k-6bVNjN<*a8{MfQ*h`*e*1pUb|XC62@1oN$QP!u-6=a%arX-qH$x z*Q1R`-&a2R`uV9A>7+zP`ZzG?S@>RXVa3u>Qu+k*I_(tPq!Y5}Oo4^IymY7HwO_-< zJ3BiwZRE3_sl%s}2$m`Zrt#Ki^~Z>pmqJx2!K5G z3B3OK)}WM>l&<>yW@)v+>EvBAxm~kIhNzMlv=daFij!GZMxz zR!rHq6Q$Z1Et<^~kD&Kxwr;$7EfyXgK5Sj(urpO1%cMCmIcff6e0aFrYPv=Y+}|f# zmh02Sh!B(CpDBk)(jonTAU-Z00adL}F(CoCy$>SRgO) z%eSmqciY>3{Iy@}Ykn-QSz$5GHS5j>cK~d&$IaCV_!RIn5^vs^4&^Gqjymp4nHm@v zeCB#laCmrl7oYYv{(<(vJkiOe+5-r-wFQe}u&G);QDM2cyq@KU zgwQ~lewq7TB?i*s0-sHDOUZVTzP|n`^DnXQ6!XJzsSS zUmpulU>|8{Xi)jTzAGv0Qj5Cl{qON! zQ`MNhKb)3*?cnjTQsan-2)V2CBW&kxm}kR558v(6XBQXv3XYDPvjN@#t)F-v#wq4e zQ{_*v@j~E3N7vEZ+}zfdRUwqGn76R7P+>i5)#MsZE^vN%(0j5wt9v5~@aYE5+4vD?nr!T#Sl{;E(Ks zh9J_sV~_q{qr6jph|SQuT$^)ooe>c92>wt1>)DUtJv2kV_~dun%35C-8i&6}`~O!% zPwe{o+C2_oWqzjK4SvV=hJ#=D!o$aRpe@3*ywtZdZNG%B-5_0QrQnA;^n(nWR6;aJ z@nFTB$oS9CpFdBT5g94Kk}^Me(u&2Yd3IrazJU2Lj3)e|_u%GI^zPlm88>$0dwBvUCyD>*H~)^ms+vSm{cA)-Q7-0gTzb-`9A2A} z>9%kRp&lp7m;~-fm4dIl4#B}gW*YU0{QM>x%Di5wyPb#f@-Kyj4;LfKTsgF>UWx|x zeS34*j3y>F>nU}x(!bDVKaUg;G?-t6L4BYmhh(mg$V$$rfw&0h2!JcZAYp~qypJMe zQ!*0R*89?PddJ54ntaOapE+!NTYOG{F;U`oSjX}u47=Ry)ndo!g1DsQ)2G1`6`RNHaJYo7#Mq1n@|2dA#8iKLZWPjPl!ZJAfLg-S*UITkQ`@%p)5o<7HWLw&yi6R|7knG9=<=hb>lR+v zfWKA~W1GmCb+ozuD|Sdb4i{cKgEO;u*8QN#_F$j0{f8r?e}NiY%rRZx_=>7?FVBQ7t?2hYyeGa`xr2 zv+xAe1D{9$Vxfve*Iv~_0=sz!u$lZ*=NV z--Vi4IIqLmRo=G8rpc|Zi@n_n_c-|x(v=si-2{tZ3MTjg#AkA1e2!alg}zaAQY`Pa z8?wBs@>L6GWtnrZN!FT?wI!MI-!iz~ToKnRW~Q4Cf*pW0{TR;dVY~Eupk8_Z zpi6gVb0C0_aqc8g+(HSdM9qDr`w@8;0z-?_lgDbIYilM{l2THz4#(SRoc1{61=Y4$ zovJGq#vU&}T%`GfB#1c`lI~$!Z9A(l3X_<>N>fKK6 z_X8L{WkxBulg-{N_Bx#O&E=2b%QNEv<~!1oRMyss+>wsE*^Sp37bnlS;$&05RdHkB z&{uc!c%NA+DZ&Zvjc!)RSmqb6F%J(8UNw3l%wPYJ9@nH3q%tLsr9 zZ9`19#!6r-N04qKasFPj`~A9J3Wn5V{A$aX8^f9JT1}2EHb(yXzkej6LtbtzcXQ$e zwQWcX)r7q+g#q3D`NQtcjZG{jbor80+{UI%57Fa<4Ete|b$xs<${=tY(#D<9&%~k= z68>o3A>$j^Z5*6$eSjzIQEs`p7LQAi=lz2A6k^Mq|+|=#idQDM~}c|?wX3rjTpZc6T7wfc(?IBc*|e*FW$AFKtuU{&t&F=0f+)0sIsO`S)r)<|9UJ; zH8P-V(7?T9(=iY=eJ_r@^1Xw3djxK29pvqwcMj>)2&%PqmX`x)q?(9s9|pJzOc5Q` z@r@o0gzcho?I#G?z)0^Q`|+E%20Bqit^xK}OS@aw1T>S6aBBIx)>?=$A8&iJ+)_)d zze~xhT+u|lp~`E%->tD{gb`OCA)4<6hc^sNk)RFfXrU%og0$n={(Ct&xrQ#pObI!; z-Qj#9*IjQXbpE+V7wo?b1wHA|6oxwuSslR-QFsaQ33@Q*sdz=v2$v>rf@+8vn zKTgVPYsZ8t@+lu5NxmxMUC+|TPhlW8TH4y91?rjdPm5jKTKoe7@cQ?cyCNMQqd@HX z*hIDHTUOS8{QbR$FD)##fF&qaxpRxsMAD~2kOy@My=9E<xFwL zZJ~k6neYf7->(dL9_EiC##aJ=wF*9OCM|?S$@$@Wd^buuFt){d^xaeM%!pAil@cE_ z0txsZlNl+&9llsgnoAZVIb3QiM>X;DJ?6kYe^j70fuC*H=%)jM6{uBA(vGl)&qdPZ z#l*#N$ob=UZ4o+KfqBGZUP;IVzUcLO}|7W{iSiLx}ICc z!T=fiBcOjnJp%6&7NhI@M;OskB}lJqZgx5GAjGtC@Z+*>{x#suirmk`EO)8E?CQf#8+UB=c6L}qh08#*c253ECg?esDF9R7VKql7?^ z^#Y3w!%PL!ZREXJCG3Yvd4TfjLh3HCsP$OQj9`}eA0MFHX}rfOUKNP8a?6PG@TfzI zC~E*7o_8gW`0S{c4Ty;Ivx|s#8S+5@xMsG~rsYV;7~PV7SU3NgNde2@4P=-i9reQD z93DVGwQO5mShx*wM`lJwF#3fh@mod3fzOnp!6w7s---rem6Vqs?d=tpmoL{cdrvf@ z<8*)0C~R;&qQ|SQs_IS?@&dmu2G+g^_GDi>@y-Ib6(n&Ow+99W;CMo0JYIyb+NtLV z-Pu7P4JnEtv+~l?(x@nWw->;>HF+aq1Ryay!;;EV4h#r*lcg9bVSl)$0N3K!U07Jy z)3c$zzP`4$HY^Oghy!T|2}lG^AaJa#erOb0O;wd}ZU_GcWL`)D92i)Us&B@)Td3$3 z92Sm_6}M$zIc8=Wa&w<@am6(vA)4K0KAv!KRp#dYhVLPpwA`?~v5}aZO#0|ip8V6g zy1EfaKS~9}`p6jkq@n+x9LMsGwXuQ0;?7Rbi=wYzy^DAe&EQn)k(+Uc#YRW7n+{NO z)&ubt8Xhn);5Y$pAF>9jcC~3hmN2OPh)&W$z0DppVHKB^mX`xtWO_sth6-*hWyapl zt~I-zqr(Pxu!5wdr2k0wzdb!au2&o(RC{O6ad2=D8X792Lg(EBi=FYLv#YCX@ZJ$jIJBONq3y?*i;GK%&om6)Jk?Rm(AXHv2OAZD!y1i3PK^Wv zRPQF_dByx;#%=JUD!}VaTvtKfNm5c0C_xG8L_v?5pdfKY#V>fB>;@$zC9od~3JO$_ zCU8jL&*hPvAu))Xr>CbwZcXTKe%u!*fz)$gMtGg~^im>5z6v9=fAia;Sm3qzUo8Mc^XLj$*ekA(8HI!_ zT$x}9R3I~>SW(&S8eOkNL`+;`HcT(7Jtrp0i%$C5`7!rfj`+<#gM(}k@G~*VFcNmO zwZRg=Fw}=lSRiN3$oNV72c)21By<;dx%7R$hZNL=+?hzRuLGZHsgEjX-P zrzCeVl9~*?3`^9KV`Ck`actDyv-b|P$K{4ON(l=iO7UgNhQ@>mKz0Z2i);!n^r}84 zDH|h6Kti&)von*g$}*DrJEO+2cv5d?em-5a zD<%B{XJ00d?P$nB(YZX{~1J5(?|7Gy3}aU@!lcuqB&b0t z;Hjtg6_=hm0}Y3KX=P>M-uh!)@Qew^u5yFfGLS&{?ZmTOqBUM(5OhpMffv%Y0Gux> z8rsF_X}s+cU0fHiN43_o4Zv`7n*I@c{Tj+J=~g=jbmAbBOOtrutqaXXJtyuC^Ixoj zC4f+ZmYUim$m5e36{MKf`!j=zimWqVhhg3KA?*Oywhbf$taL&`LeM?30a;bES$kkp zeSCacbank;pV3js_X0WMi3kWdtft1=+5&cM&3v*HKvS9EbHv?+6Y`;hTR||Ba`8`F~geZ#=n*)XMBi+bNUW@ zP>ViBGEZ4Cpn2+V5?4qrY|n)8nATIg>Y5siJ>F#DxqItPd@y$JI8?j&D(8PxcrI>k z2in1T>6B!r0flW%Lmxv!3wgE+5(*0~fu{{wJ3rb49>F~hSp^ccv{?sa-OVp@84G| z|HUk8pX|}>Se>dwdE;T^pL%kkp@RUvZ+;=*@~Wimpit?y@IDkuOzO>)nJRrFQ~nMY zso)8lkPyXR7d>T&x(M0hMX_P%(tu=M`=Hw85`d++3(j}1PwyV@OHtm3BuF?|SlBCH z*`VN-So#bFx5xb<4iunbujt{LNT59d2-EzvnG_2Nd;LGc3X4774-wk^1pXb>8`X}L z8>%-`?!DACVTGrmI*{^*zED691&C1K1O@oF9fHuq-X97cL4h#bNut{VQBmGCAWF&u zQSIjL5Dy5;q4Vc*u*V~_fSGX>nG@cI&Yk@O?fcM>aLf>~SDN^sb`P$zyb{#*fo9hL zG;u(GxDTM<9Xz^0S3e-nL$&i@iV(!xic|KCURAXK|zJ!tH{Yn0SsVQ7$v{hKBaN~*){XCC;> zhLTDEpAph2r$18e6`&J$Bcfxkt6=#UGeIL?VeFg!O{Ewx+^1;pJ-Uza8GJAQ462f# zqy~uk@x0{d`)u_V*Etpr20||^`=(D*sTx$srG$s>qNKiuIlV0WN;ysZc#jbmI=Hp% zp+QZhFX5p+MB)J^a-`k?*t3_$CK8nv==`CuH3$MQB45fqMl`5!dy--?e2gifYrU87 zkOVrv6dWW3KT*+|W#PF50oeVAccCRLq~1DM&Zb-#(;Hl8quVjj-gf8H#AX29u_D5l zuuxK~VNBkCU`#I_?{(jYE~K%r*9j4Pj6I=?CQSBkNqi`&3AYOq0WFQ-I zI7K=DB9rP;<^N2&%*8TjZf?$2DX2DP>iI678YXgCTR}cf)I-l5!j9j6{}P-shD&CF zifw`g^fP0~y1KKF;xkPGMCn#tr=Pr%zZQl8KQtKU$=U%++o51W}MV zXg?Yo8*vpsJLG&H-pH0o%2@7sQEUMUx8FTIf?n5`;2M~s{+KzSfGRFsGzijQ^@jse zf`Wn)!lCK)va^sZoGBVC{jUZD!}KxW5g{Zl=k*n%vT<-YIyu>eoO-Fz-)@>a0VK|( z9q)~dQh6QdDJj$S>o`moMz(3ZsLa-fv#W!Qv&5^Ma^xLX9^z@&o$j`6l`c0h5QakV zKtf8owYs{xv0-*9^s*cPHY}kzt9Vz6x>BAp2n8U?H(PG5+U9)l&tmvT77?C<^=yA$ zx2dgqoVC`?$pitFWK>Eg!*7-o?GUknTBlt|y@F#u@Qg)0*GuRam9)rGbOAyH@`i04 z9jc$W8dIl}lgUD!el7BPD_-Te?Ant>O2r%LvTUIaSyYVtZYqL9sz;CQ^qPLKD(`nw`c27UZfWX!N;2V zCC-Oy{gCNCJUZfNN}E?#^K$`23TRSWOG|n$T<-%obeJ7z#F$BJ1yc)MLu&2aN<2=E z$&%Quw&Z7Zf}YIQD8;LO_EPa&|0T3KV3^a>$zs~rCaBaNPG)|t#NL;7OaURv6(&oa zp0AT(YalY(J&Up8bc^3-M_VIjS+d)Fb!)>DU4?o1g(=LX1=XVjILbIbZ|dVcpJr;I zNK${<+uMUABIfo3nWsKEig}P`QaN61tu60m_(JJ9?N;eh;#cp1?AMr?xVXP>``$R5 zhW7F+wos%5`}@uZ26VGwBwT&A#@S1@XAs79dAc92PdV&$)g!y}l^>ay8vA##eP75R zeP!i3lCa%)^&MNNM2!>=&u58^Z=##+cu3aO)z#ho62N15@9EJNr~Qzg4Uz7N5tTum zfr3JRfZcexPpg==9+jx^yB%8?B&YB36(ZJvRu05>Y3}?%r*l7szWCWwFpKYXdidrTaF}|ro-pW3QTReomSO+3s%_gO;@HN1zx%h)Hi7>$ zcpd*j^4z(?Ebq3{fU`RTOMbPBUiGKDua2b+4GkqEBwoJs{$rN=z}g2QESP11dg&NT zJFKn$o~g(AJ@$6JA1iA6S7mma*B)+TY!K~1Mw)eqMtwWLoRVe4_op)bFpwq?9FN6$ z@H(aNQhn1^6*9nDF;jSJqwsnpAz;??Z@W8$IFN#c{67*;MBs)gQNg1RE)`nLxj2B#ep@;cTG)%_-@a zmO0b))hnE1+RI|guJ4nT2f+g& zx>C=NS1va*$hSJnSQgzLsjUR|z0R-=Dq63H$LEHgLH5H+c3LYGd7COM??U;HLIf=R zs)K^icGl@XywB|U`^TkUQwjF4WB2n9S2$>6(X%RBm#IwqVp|D!wvByra&oMUo8ZaX z!jrAsggTt{@2c^-#hXF?f6XiI%as_{G)YNHCbFA;sTR}b`8n7jsJs6>RDeuSrccj` znLB(Ec&Wqn!9UU&KNw{TXY7{d>a0gr36>6=?_+c1*ooFiW+k#2--FOgfo{j_Bf$!) zg0k|1T1&c6ffBVk@C8xYlWec%{w(jqB`eLpG9q$#zb$p`a`bTxmUS%WjhdR8j3);6 z$_kZ$fJb-y^Q~ly!JxE6g`t?Xyy3|&o@dUB^tT}skm;;4rPyn)g8~o7#>POHf~SB_ zw3?Mqm;ledRaP#upCPjN!Up>U)CsMct;JLmgImMaX#Gj@S=Y4g9_!3xf_LI#!*q@7 zK|4!`1Xbj(_I8yUDG|SFb@@DH2+1tW*5E=2c`Pz1{itV0eQVS2209v^BLuuxuU|9k zHjs)6G=D$q-2Ukow8paf2DO1|>27!8@A@C7dK+{W&y7KugDsB7%cehP>cu6KSoJqd zV}6$5w}iE!Ycb}YaN+L_^}?|XSb96mRY)-{Bl=HI^_HV-mt}kbR2^_sg8XDkDY(f z9mFNAlNucz)7+49I1%Fo1O`G-SiSHTU=6T&HsGN2i9}*$QtbT zZ|XYYDcIW|CiB>_4&i^@AQ^~}e6OY!uS{>bu)u#(I)3dV;PPxB7vlhP#iMv{tRG*j z4D=w7rYBj{#-ad!hfT~LY>8E&e{@fZc zpBqAys-&pMTL0`N0m(r5U_6EU`Aob`<|%XE1KT2o8CT2wZOteevo}#R$6{6`#k}c1UaRQ@G2(C2(2Kh)vubxz&|LOlRW=kqxT0IPuUPz29;w zkeYTbef<^3QnaSF_CnLTo`8tRctq;CL*~j*KZRF?*Dj)8TJj^ty)vTh45A`Tazg@R zs^td{rpPJwJGUoYfMf$T6dXbXQ>QZ0N)g}PhR6MzMGOsdG72xQoKd~As z5sG+dfJAJcEmJF)Qo9phIW{%9b?CGy`)=11fBXeua`N3E&TXn}IroF@Q3z-sj{hl; zGwQDOV3>W3fBzhneBcKaN5`w)qz129^8d6bpIz!0k{pGYF9$%NJml~A2=j&9V;~xf zigw}VJjs?zWZdk8Mt;E^FqqC}_Q~0?MxK=CfTmUD(((yi_)GQ+v(y|dopH%VJn(8|GW5xFUGy+@FZ$AY;4W; z;Zz2j0P75IknGwrf?c{Fvb6FyDM|41fvGBkfIF`zKeJZ4{HKS{eHXbKx=KDsW_{5n z(Wq~eJW=c)PqJ>@87)mxrq}*y_@>aan9wIKI{E<-(dzUBomqdPMl2fj&KA-0GtpxO z5QGz2fg662V4*W=R4wzT|;5ttJbnA+m{KJ zbQ|2q2M42ILin}VE#8#En2jG7Y6@4)1~#haKRQhe-y1`-KU{E-;7b{w7-!O| z@d<7ZBgs*wKV$Ad%2WRS+@Kw||3`0w$i#Ygdc#$v9pQZi;Lx6zm|_7)9?X%mURb#N zlM!+HqwSMRr6s|0j|`D?P*>*8M+`mPgVzW^f315mGvY#mf^2JRcE8@4On}D+q4i^M zu>8{pU9`PTS$qOVd+Vpi#r;6}<0qkZ*i4=NTMRK-|NiRucrCWS>vnagNB96yK6{o& zTjX^do&BG9i4u>7;uKS01@*ayEjleHHZuc*6eHn#cEtN z64W&y|6^0N5OnkJcpcgwI!d{X6(C~)@CToP6FXqB>NdmE4<_{0#3O%EkBeb8L8=UZ z1_#=q%kuOw@!jle#EHa|uS>@xo$}r8)0j40_MFUpQ3g15v7a(`hVi>iC}o z>zxvTpOW2FvGBCjY)`9uCM0#=OhcgMNs4nMCdp+!|zX$OH zowA!zUG9fa)6irM5Gf0`wC?p7jy<+smYhP9B7%nl9X|&#xYR&2fQVQB?|9RlNPm9p zu^fGjO1W0)eiOshk@P2ajd$|jMLU=Q_7v*W6AQSMx7kbq;~i|$*Htrh@z9LM{r3l| zTe;mC^RLU_%eHyS8`C2sNn%M&085pRa5KI%0aAchfNIMl0D-qNVFz1mZ&4Y)#_O-J zRLj%i1L7AWU}4c_UM<-ai8+i+T~3FCNaCv^NX2vLD1PZ#0K$jMn@=pFeBXJiz(s>v zMKN9?tpPd21i}>zGY!HLYt-;Eklkvc!2vG#XAO3Of7+qtYx9HsfAc!+ zjB|&;D2JFRuc2%5X&G@wQf8tm5De}vBeMtyXs65j`^)}p(_qB^a!4V|?errIL>>40 zFS%wx*|GV;ZfjF4Ix!IhSw?=^(!~7kk7ona@Z8+oz{10MZmTVh!Qnqn=o0j5HUyVG zHb5}tSrGW2IOMt^1&&nBr>((=;@qlskP&MBSeU^&z@;TPe~m7{!^FHP1hLl?gqIouny@6O;7p}yHwUX3V)NtUr3}O=dQ}wj9+W0>GzYdk5xtJs6)BL7 z+eumfV``&*bGPnRN`$w<=F(p|`?#gR=x;H8W!@7J7y+@&!&5JR^}mUb5J1CwgL(uS zpnRp_f)y_$YY^A^pGl`WyOvX;==?1{QjPCjdiCKXRjkZSKox3@{+YuF!CW%!7;<3dURbv6Kf0%ZCj zCTa*1M$T**PC`^I3F=2<1x2%6)3F)nW^*It{5O;BVGbuuM;Q46ufN=Uw5*q3JK6SDCT}*` zGkW+wp(q3GI}N6B`JNfszaF%6iZIJz4^S3g;k<7iTb1ZgnQj$8l!-s&DI^;i8M|4_HAur3)kJ;k1!!l|C~q9E zS%hefZrs)|MD@B3!{L0#csV925 zUTGINnp7RA4Nwh5y@!1*;PmcdCEh>n{_C+lP!%-`#K8lMJ4i)}TQv%`u{yANCbL(^o5?cf z*FTZ<1XQ+aYuuu*;1c3MM-9!C8 zkx_g$b1$XX%P5AOCh?4d*=*-u3(lL#%^?@wFXl5D>M!+rLk#G2FHaU$F=J<|V)-mb z45w#&k1jx$Jv(76Su|f_6kx@2wyD!VFjl{LQLK8=lS;rmowlcVqLbEr(K%<`oE=^I z^Cy<4Vb_PNa|hR-FUaX>YMtL52flrN5LY)*)X2I0Pn!qm_fwp+%I6m|DbIVm@(Y`1TZU!C)G8IrArVit z|L>Pa+xN1=3#LpY-Sx@kEXhWOV=K0A#w^F&W?g{`;t}?4aXpqg)CHP(6Cjih`Q2ar z9B!1V>jniRRgK@Ts(6*`oDXjpDh2#48s@qg({6DS8z z$uB~?V%v~+zbQVOr9pqdN6^IN^Zkp*_IHg!<~lE-1B;P@at3-2nN1_|i4M9Oo}x(- zj$GX|)|UKOT{-(6<<+A%>AhT@sln&RyNipO*G7w#)&c@?1FN$Cc36Vf{&eG-5)~FV zRvp*K6x!1Hh@0=qP4? z_+j19ZSUsDdR+3v-17MNXzdTp+W|nPpV-h5H>KxlF(_M@OwII1Gm3gvwH%#n94y9i z2Z*#fJ!)RQOhd&PN%(pYp;-*?WaBAhw#VF@1R2x+X8BBeBP$l(EXmaWzgmEvaHW~i zP1@dN#~n{z(~)l>NrD~2yiUj6IDDKbd~|N7#Z{s~%!@Pe-|K5v-AFF76(Osa3%8DEbHq(_l#{FD@sXlR2@2zZ|a(jhp!9dT< zJcECe&wh5o|4PR|yyL5nM~56%xm!tNv%JH7b|46QbsK=bd@3zec#l+X&HPb@#xb=sAnJ6-ouc&{Z5e+8JUhODI&5vEX`bk1QmUbaWi+Fe{tNdIgyK z`^EP+26S8m)t+Rv$2qh5Dam9{=>AH|37CEEbV+~K|J0g3f>b((-kqFzrSIN9C-*p9AJ90VN z(?6}c`iEw{yvlHJ5Tk0x>{3>+j*-0Rm7T($+LJeY9%p83K3Y1;(4a5HP3fzl5Eorn zV(7_-)7wjxU1F@kbrLVI^S825?XW*EMpS3#YOlXlhN8Q?q~uLh%DMBRnQUSBjfa== zDIbSdXngdN)M$~ZM5b3c_Qo*Iwoq|G=sP3b;I+8Jp`<%!VoNi>-nZwc2l6s14>buF zad=n8R2tVE2``Bc!qsj~b*Wa3pnQV5bM|kv%98I!AWeYZ(B$`~L9oeW;O>1dS*nby zkY|WK90FvF^z;)1r58KXZs!m3mY22;H>wmY*7E$;B8YApcI88Amf+noc&Bk_?ho6H zt&QDt3CkJFdX{U=8kBZDpOal7udCQH^RDA>=kGarEEX14=K9uT-+3+#m2@>s42<=M z1iy$NukqlE2nMC)f%~Gy6!E`N-GyVCD~nuT5cK#pm5ONMpRku%7q1oOy-zc|2qYEs z*dMEIl(V|>*?zUZNcoJf*4CzA8k;|Ed(3q)7S_Si!eYX?%w(jC)#qexzBQMLBskY| zx<*qdWW-KL@t6Yp(^A(fhSnLOv>(sDXHk9wngC(T`&8_nOTQ?)tvzju_A`2NmR zCDxiA5fZYV=UeOE#LP8i@5rQG4RnezC6Sa{Fyjd@dOqcgDYqHsx*2skloF`vIkykI z_T%5>Ouf0w?^kf=)cwyE7hR9Z(3+5x)kpGNa{ekgF)NvAx06RrsapeoXWPoxk%zB- zCtwnfu4Gc5mpa(37Ri$s`WqXcOxZac357qes&(VPc=Z*3Ii;o7pfamqO4+KnaO!e$ ztNG^6p^e+VldVMWj1ZZ$jf4HQ*+jQSz0`=4kf8IaeCD(M#<_Ec&GC`EgRRlw?zocK znW`*!?{oV6js3~-xfTBIkqDcg@*RS!$k>Fd-Zq+>Q09|5#dPWn(35@smsu`%OmW!h zlw{z}=j3d+@1NKh;54Zc)GGnL2wM?+`ccZ}V36_u@i8<9{{ zSXdbAiEl}HY0`0XbO@z25fOp7N7e5?<0Z9??<(pGM8(coewuBK8TS8_sUQfq+xc^a zg5)aI=^+(VR(ij>TJ?;%S^unjcW<2}rodJ(>Iw&Me0<{gVwW(wCMyL&yD^67U)ycR zZ`lMfuwQh`q{>}pUc@%Y*S|9e#3C2`Iu%76wvuK?6#QF!s$t5^4Rk(p?(UOk!BUmF z>V!vc6%-@pa0ELh_fx)jjvQoOKK3Yd}dATjCoonnniIAX?Yr&jr!Us zRj&qSwyWOnI*TjU_Jt$WjW@$*^_^B-XY6~jq+A~sN5L;b?v0x2##(Q}y;!^ z@fB5M1xK&^K*YCVGy!D#@yosLJRv@Zb)9@<&+5adZ@--V3I^Qao%Z%mDcsA`a)(0E z&Dyo}tF>bmt&z_b?c9)E28RKz!({ z&QNOeP+smD7f9*3T%uG6&{_VNSod;{jeuTZVvwNx^ ztd^(GSGO9)4sDCb2}VU%p6w6O&}{V}aM&;J$C%FVPCv_a5BfJ@>u57WS{uGSq=mm8 zbmM%^eU$6<-e3fh7JV;MF~j( zX_1gl=`QI;5b2Wc?)ol1_xUTH-|l#anKS3?z4og2YYiQ@N3QlKJ1II?&Om!+-^{pW zdWPKROlbw%Ba%)prHhAxh5N;Lo+Hi`niw8g?hQbGQIN;}IX1q@U)`(cw(Lb=++OMN zVe1ZBKwduQ`qFyK+Ay1by}Xq8B=d`#WlXaf_sYK-D7`DtY@_&Hz14DWc;wVXcyu_& z;Lt{-wCHSmi^av0nxwR3poBK1n4jd#Y2bpG*H1a|&xv_nW38a?H+N+P)`So4m97ux zKD8uzimVSMmtqQK&?|+1D45`R9v0FYQaUF1XX?I2ngu&wek*q&(H{EVa^z8WlH37<@YWQKwrhzZ6OJXDb_NDKB zc#2D&q2uIjD90v1w)azPO7_Y*e@=V7@Ip?@QbD^Y74>Cn!z)YA{%4yn*BHh~;6@+dwk`41JZ~k~FV}^C{1Y z_Q}7C7;iTlxLy`99Kr$nYm<~m%gt)cfKa|IQ1M$Ap7hn_+I+AT|AS-*2W39=fB$ta zwMBbvBvA(Y4L24cIk~)46_TB#)VXPvi(wCQcg}0N?>jgt>mIu!rN5|d8C(^yRQP!< z@v^U~u#3VXI9v=rVaf37PeWE$iqEzUMWLUT@tqDG;Ta8AMUF zjE=_oo0x~JaQv$F%-c6Bf*8NJ=n{T;2O-(ubbS4y7gm98t@>dLe>A(t?yKejc0Qd3 z+%?zDuWXlI>iKK3vCX|7bouoI^j@uc&&ai+5WnnlqBa7 zDBcBJ_CMi@9{)U^MVg8bjE;@HF0g(_R=Ycim)^WJl2(yi!CbHD^1E}lPpd+d~^r_NiPZtX2S3%<10TdpW~KOpuxb;YtjyB(~ZU>xS!8h8!ybqTtxKG(KmSp6Z5L$TiUtL^DK5$G4MvZ~WA zsLG&83B1ye<5@6X=oyFY#&u5Ja9riGwcCSt&zAY?kB^e|r`D!p$w6Ht3nabfTLF`x zTMuU8*OF%UzbrrL%BB~x@@GXN|73DMM~>8d`(KS<7D_x=(Kh|FNf}C)JCjiw4ktcK z-*d+E6OI!si{stj5D>K4i*8I*9Su`sVc8DlXw|!iiV*&ymEK&eLZ6=YO7K_b2_>%gftAZNcT>0Tp*BRlf=> z$6i@ksdYOT+n>!aK3#Pw;W^|8nX}b2zbi#FDF196< zir2;BNmao-l>GN6bYe;VS8aWz$lm=4kLq%$7{PF<;opT{1Pisr5JxkVZ0p6cO3ftD@*(Xgo0AN6?w0qfM! z;0@+e9T%6a&G~N%TKV(3tljlw;vXa zLvUfwS>8#FJ~zI55YTh4{%eQH*yAT(q3Ad{G4T}rCi1fVQQvFU*7iy+yhM?c;~itH z3f@=Pa&mIezyJBt;X687_zU;Schvvs`3@HbTOC~{{aFdn>b{AbV7yAtd~IFCr|7a8 zTY&aMz~eMkmQG?!!v{$b=6S2!eo<}H8J*x@V_fU3d$7ScdMAHTH+x&nkmhxW;NcPh z_z6)wl4;Q54<@lNj5c)SPS2Mw_>j(J;~ms-A(7z_9?g10gNcj#S7eWS`vxlNZL5E8 zTQqaO3R8aw7>ON)0~7MBZiPfFrlK-Q*NyAr<-5+>yfYP(%JTE1r4bcvnpo{uE#xZBle-PM?=RWH%GDPg7-*6Q?@VwGlykNmPtx3YLhQJ`Fqwh^-^`8Yn|huX`FfbNzElW01XG1 zV)=W|zV)4a>$4|$be=f(Aze8U7jZP$yp;cRjK~cIk8f7auf=GVlj`GqjR#7_`(bSV z8J~o@hLQ)&m^R<9zUosHQO9Lu2J*r2n0tcC!+Q3er%VNR)g! z1_Y-5-u=PA35p;zllOtIqGuC{@Z+usgmC?u%!-?ODbm!8XdGzSc<&(0x-HZ=4TwH) z|5MXq=R*23OML4vZe?74MT?0}Mtn{-Tx9&}OPQL6N^E=t3;7P4$G^&)9F}vgak2TE zi7CH&DNKt;E2eAs0dxSsBGaX%WG%H`lcl9dbN(t+MKNFWS#z2gUa1BDz`ENA z5c!cXlMoAWHr>Jt6%ZSsQv*LcpmyVr$u)A9Oqfi1=ZnVRx{?rVOswfG}BS=@+c z%S6;N=VAqek4nbfX#c%|!DE zpr)`3>M;vEkn|@I!ok6Dbv@eMy#t?v>JdP5+1(Mrz=aG92V_mtf&zBy1=m@^ zD&GMg`MvJhy@AG#&UX5hW;)6zwEEjKiA18`=1W=Z3!^)`6oiC@cZ8DuHX0Rrcz7UY z?B0;VPcUiU59!Vb(`=Zv`}XY{JS?tFQ%P39Hyv+g@-&mJdAJhd{Y=vQ9nXm&l~GE`Q%V6Zw9LBb6xHMQ>yqL z&y(GU0#iDQo}QOU;va()gCuD}xsVEN^JCP7g1qO;^YeGf$(gf4n#rgTXc!o%O1on? z-=;dy+``M(<_Xe~D=}YICL!gmvN3i;xTE?|<`G=C*z%B|t*tH7Q0na|FVAH2(czRP zQ|pAzrn#Yw<3=3210~`wku?VFdiNRZX*uVW4kdMKBZyMjTkU!dXa0@K7P&D92WZF5 zpuDfI7e!F)A2bWUEn0|{oI>E=x#K-pUDjk&2&@uJ#5CnZTL~*AWImQx+~zCZ+k;!rkx`ioZLx;Mqm3wSmw0rXH{rmUfodKm7cpOLt8N(A1cCi9?L9rTwEGA3R+5s2% z*~&UcH^iXr)X_UzcD~P5SA@p^X)O9^ZY5v(XJcM{`WM zi}w>=lo>e-srspRF}yNN5AJONdk4rX(sT{AtL##_a<~Q;86TR=>2W2* zelqLX7HcT1eA~nq8M%;GuHv&ZQI-zRW^$5pDF0%{4xp9~5I;QX7 zZ@dR$S@KvZH58~!biQ9rNj@7-`Kqp=zB#f@i)e0X;qmgba0G+a-P415;|6T5GFz9H zmYOJs%Om5*!!$tBYiF}ey@(O@68Stb;ey6RSsueb&iD0yhFS2*6S$!mO91?t&#al( zVS*1g*=D-*Ug<^H0nv#Vrz>u4AT?Ih>oS(hoFXAAZ~AABDQ`9h*-93RZk+^m*hG@> zcKz0bvFNEdV8nrm8kDF*(_x zw6i#9)cB)1e1V1UkxpTq^ieiF+!Qr>91MhRRRlv?S{f7{5%{H7{iMaS&*FceO>}rJ z|9r)R+cl5Kkel!|I! zoSCJIM8NYV0&Z@m%PuII86H0-Kv3}Jwh|hN-ck`n_y-3|mqswa4W_#Yh~IO2#(yZ@ z>-Q-FwTG65=H=`si$d6&At6eDU_|8FonZ1PV|oO;*`TI1cL2fBpxytKDM%wpG@S( zqMWy~ij`I$!>d^6XD}b5ATkB2E;L6zj3ml@8_t-MPrkIMs``QeAOF45t@LdGSylg5 z*`o`yXLoG5!H1}TQ$HsGpH;t!kdBF|_SVhq-_M{Ii~(_+rt-SF6U&8BqE~JRzNOm|IePbKAH3vmqJ=8TjF9qO zssh|0rR0&icY?ipQsSj*jM|Shot(JgMOfsR_&s#I=X+9_xsMVrhP2fMrBQj5{RKzy zR5tG7Jm{I*dGk!CS!npuKN8+Lj*fS!wHI=^I(~@3%ZSvolS`Xz2Ofj?v^YXmW)0kT2mka&ZlUe7K@wza#;W^LW~bv6tH1t$xq$v7e6eA+j!_{y}cgdbbXB^uQRTf>ksVE6EF3iOIDqIn-> z-UTa)7>&`qZOS;d((*`*3vV?m27!r5sm4&1-`~h#&0%19 z;t?QVziw#P?}?~zSioE~QMFq(%po%9Io*lZ$nDL(Ln*d%;#g+uuUrp(*RIeSXht6+ z6Y~!OKv1ruyx%I%PpCDdA6#q?%?^;~TRN!ZuLSD@Jw==oq|6G9s2{{=k;uu9eqLHW zsK#$=s=Nwx;@tQ?^q93ZH_!0WrzMq?{DGYXyriJL-_o`Q8n-4NF|wUb7;DIx`g98t zq+VP%7K1q0-hO93D}wjx*w* zJ1cit=N|0%JiLM*3EZ#bF(xbl0Rcb*1CDm|($jHe2 zfVTnc3R+3P8C&c;7>!79aNLb4%cKNHVq@C(%-7E8u8)9N&4UiX)ExToj19t@|XNdQ)g!CJiE{bjO ztC1R7tkX-<6EOf~k@TwOlwe;$c|Z5-#WYfPa>r$5VR62aUB^IQvvI+JmX>zBV|q#- z9_3wWRv=9R@etA9Nb@3`u;6OGvaY7)Q;%6jYU<2Oe^tq9zGP$RqURo$JDCdc-sdQY z$rwt68}V;=I~*&U$TOpSUB5$nW6XgNZ<*^GEbQ(AnP`BK;0b!{wxptZ5~!-6T%?&l8{u8kkB5<>5JS(n*&QJHp2(l0kz z56kiWx(n+R7;Th0a^2rJEXTtaeBC--rCx~J{|;sN2o56#f`K7pW@hGZ+Nkv1D1c@I zCg4tD^DYnDUcwTyF^58HZFlYWI#`DH+d4i9p@n`)OsqHUA|8l;c6n|!%%k(w^i^nE z4%P*$|GEIU0|20F@%e)L3?KynB9XP_bo`Z?I9WfuoKI2vIqA=AOl}h-%vNV-K?H!- zN$>8hx&(Nrtu1yw^XhZ7(EB6tjF;90+Xr2lqP@RWm*(c?mY4lSx^eNORX{cGWSeqa zT)TcQRR0tIq#zYstSwSgE0zB;J?_*r#g)Lv(ciyA#A>5xRa(~@M992z|Wq9u^^gorB(MtI9) z=H&@{o*yd%uF8|)SmxwrJZ{AJH}<2IA7_?KBu$bIiKY>EAZoC!JGsdhPRk8l5!Z;v zLM=qnxK>xot5diF`kPO8WA{`%JjWtrc2+FgQfL&ISrG4g%&Oslt*ZLxiTa5iK++r> zU%tzd#*_BhoewM;{5?gTpO0ADd2_UtKCM)9&*k(6)2u@!P!hk1yc3w)QR~K{QZzGL z1m*h5XMRLb89Cpqfk99S#kf~e7}dRXUj%~G>v~2&cgywj4#LQi+bL|el>?6xfQysR z7EF5e3cux3N~WOeCcsg``t5#o-;NL|C%58pdUNF|P(&me(AW3&gobHP{LjWG@Y9@t zOI!Wp$6U-~BxF|Mzbxg(;G2O5b2wOeT6VqrfGQ z?k8Mz)mWT)AC;+a&5;nFpLHtj&rxeId*D$M&^}G-ebDoXaT=-z=zbOm!`74bD8T_( zW@Jl0zOM?ur2B9;@v17f#8)diA<=cirgM&@uj3!ZhpvTQu+M-q&2*IAd(i2+F~-yQ zfE*D-WyNpwhXEA_!%ij+RVUlp9sZgKZ30bv9+0lx4dslxx09^wXFd8F_muqz2C~Eio`K5R&-JFb21d zG>^RDkm)2`9(wWsd`ZYuh&as4q9^h)mdxA^OH8~1gG~Z7&q~LPk`iuc4YppQ2>`wb z;#oXz9=zGi%nUSn%_y7$p76OqTh-Fg2=jH@TU0De2*kJMb^q7=84erZWj^NQBd?-*h;^N|3 zTU!B&>+Irkx%?wEh8IUXRmH$y3yzER^%|f|q$9=E6R=3wtVfHa)zln;byZeYc6xdW z4~vM12yi=neSP)s&Rkx<&IW*a5E)nVREYQRCLJ9em@vmD6z=Y4kdu11|668ZVF9e( z9<*y89$xNtu|t@Gjg1|{ODNtvx3J(074t&lHcsR%Z!u$IT13seGhTo_KYH}6#4`QIp^U4WDNt0*KpZ5c( zR!2ppvE)G!9V=^aixEl#*oxRP^e_~#?bBD6N07RSoJOnVsZfUTBO zV@_V)(7?cr_mHB7%s`-}g#`x(2b_J-g5L*vVqXa)wh5Oq#rp%)lKmz2%!N0|HY7^z@2Qfd`lS9b|kkDG_5BhYE>Z_n7+X~PFp z5Jywj=>67<1Gq5URD_|WWk(4sD?Jz29$-=t_yEPp$~qaht-q~7%I{#JsY#5`t#ylR zo6E|{;WB(kg9VhI4h&Q4%y9S$EhLh4Q1gD#CvJ7{f50-NBP}1Mr>BE@2@OFNss-9< zkftn7=PawlfeogizNYEtKFkwS+^9FO1+<2R4?sM^@e~ymAs6MqiDii)E6Kyb0deWG zahiJ|w~~~UblsfjD0$%D4JVVLf&%Kox{|btmX>YU*oS{u`q1l+zy<zO$@XKMxZNk8^!5s(3fh39d+{mMFu z@-sYM&Yx7Go%)-nJ%$w#q2}nAeK&Kq7K{fshi6h$1 z@#j7xz?obA{R6NavAk$u59K#FB5q^W`bLL`s~Q;K$zSgEdn!Em4$&+Wz7U3m+8`_Z zQlR4hXvOeC_UY4B$hd7y*Z08>40?c5>UDKV?REb6ze~`^Sd;nm1X#ru&M#A zJ3T!;z{TMNV$g~!PgmLrfS9+p?chdXVPQe$3Z#v&MJFpAWvmyxy>F0^kO1_WExH=w zH#vR?V+4S^EFs}-I1Bt4CuirgOH@j6D^pWxdFWvQmhCh}Jkd8890Hu_8x0_Uhw&Lr zCH$11K+*!Da7e<$#m0hUZ#j^?ZUkU%z`|+P1ULUXFOuA^sj#dp{YWa{-6Hrz;qmb- z+7;HLoUE*@RmtIIx~i%Y(aWo=^z`(dha2-v--X?u29fTxkQi_vWo76AvO75F2*w<4 zSz%Vz3h+_zD4sOT?KErJI7_B6c1}#_{!|Q~dLktBjg6a|`|G|dQ7mMc0E`3n0RV36 zAa0%pstB;cDl1Q5=ZkC$P)}6Y0n-e6>yLl|4fl@Spl*Ke1R5h=mRN*=4U3Q00VmFI z0r*j4J;fipF}=eD#SMdXZiIA(X(|fL<}!7&Ttl<$1-o_wR8O( z#_1@K=))#}MY3^Fw(%4kDM&|VXJ;>m`}^P8b*aEprmyih2}dh}Dmlnh>)em>v$IzL zmk-7l%r@-kyV=vNzHW1^I8}Ta#8Jo<~}|%EG-S!y1S~{Adv|GQv>Q0A0Hn=yYw{?)Aeox#TcXwN33}f@SIQ@0NVhBeL85A z4Zl+~C&ReF6x{`6wRx`3eQ4Kq99qE3H;L{hC-0tkn42#_Dly8}jkbpl#GOE0hvzF3 zLh+)^nv7}vZ{9d))PsV9Ssy(DUVC^{#60jqxDl45qtRU@LsGSQ{)|S>*T#6^pf7Spw3&tq$a&ZL)2T#_#Elx~KJU)BK z^vc<})W-)M5fdMe6>@A59ud(&2;6Mh4p`>sOOAjsC(L^O{P}$+_6<|Oy_*8FKUGO* zz~=TlCZxEcB3xSTP*guRFYn^g5)3YJ-kM72F$1~C-pP5HsHW-L?HQC&iE5LMJB36w zwY4Z>YHATmtdE^n)f-=N3X2fd^1 z$ie}^8cd>zayr=S6J@wU=j7yU*2HIjb5}a7142h(sfB>;#h!acM=8>Quyg z&^dS%R$YN`*E>@q|Ni;ozbY}F%!QPHDNo4$252GkW#F62x^X86Gbrlu!YHD=Xn^rupr#Y$WE05X(z;=27H1=b2OIzC>CkxT@STzm{ z0jUDuQZP7c=Di&H^JgBCcW?L1PMLT15DkEvklNzm=0Oe9!*4=w!z)-|fvn))QGqc?mW`+g*v>^omFaxw zNlsarp~D6S6+9vpl7-(Z{dGloQHk`>Yd8l?7Px!w?zar(L>ri80sAWaUmP6vEBDi&J_EzsI)4i+q^G3N z(9`>RTpzE);Shs8a-+4(TwPrSzzX4+NfmW0x1}o9Gc7vud-njia(;gPmCv=#+YwAv zb~ZC&d9*m=E9JDbv^3Df=8L=UF((wszrDwd*nU%kjzzq@wgyu3uYF5_d6LBPx96=$ zo?5iFE$ywa9Ye$yZWaRFqRlKg|Du%J3jlx<4F>(Udxm2CO$2Ohxcf%bO6)Ul-@NfK zyqVJgBPFgIRXn5tupXtZsHGKs$qLVF!7^D@GUL&6ICRFancLxx%9R4PJm9Ixm&j01 z#QIOks@(nD8||R}pw@8_VwXHm0<*V=>tqP0Xza<4PMZ_4Y!jtcB!j*zI4~B2_T&Z_ zPc^2Y3zgV8#k}4m*EA-~LLzs%ps(LxH=6Yp1`?wqXiXyEX!`Tud!3 zCok`IamoWH8pUty;(4?H$hfF;3#v#;qQ=%LetQyzMu{S(p`no|@@`o`wUrhu%zkoK zPvfs&|DerVv1nu}H96THW^f3||NEGlo<3kN46XrW=ZwsAyqoadus?X<51bY z5Qb-i{x8HMhRZzUx#}P|X5i)_?$6Fhz+Y-=X`#M1+#`OS*__y6RQT!X=E1?iwRKgA zdz?<5T~{YkdzU#1f;2e65$}KepxyYXGWski@4Zc?YLyzkMJ5bV!LOpr@qKM+$yCx* zl$#sWx5PJn@7hBF+$I#%_=@YCpjHLV%&EC?x5f1%?yn9%yd}`lyl8coSO_)~>>#Sp zjI^|eX;*2IzOZc2%)=1do0Ec^xy0@_8sOUf6`>~e@FA=KUA7~*u_b4b=Rz<)hCra{ zA5=W9X^2gJcn@DZ4EXWGSV<}5*=ZQ~R3(gaX-Rx90N0q?wG7_E{`RlVPE5XYUTQbv z(yyNL;0@7DhC+B?amhP2#8j3%5aL>uECTK>kXK;Mr>}kSeGQH-b=|Y=y*(C2#`|7X zu^dCsVU*1SQ&SYQIt(~5R(J>sxigZ}GcpKm8zH9axQUvaGcUPD&K%lCyGQjKy#}Tt zUBgpjxiVt6QgU+EmX{fGf|tOt3cs@!*fKCs1cO5$Bn2wN*sy^YYZ>m2HlhI-K`mw< z@QG`#af$Kq*AAeP)`Lzku^m(CkOT|7oCW{(pCOU^9r}ws?H4b?@W2^_G_Uur*lC!` zkEgeZZ!}yJAZ;cXJD3aog?*Gnf++Zlz^8@#R`~Phb%;3g>L>SniKur@P7Xz$np=f# zeq7v3W8)uReaY|Ml~SU>zI6+Ka#XRcOwKYB{)+6e5gHS}<6_VZ`R&QjTZ&tqU0v_q zy#oTl%Y6ZCUiDPH=5KWZS9gEL+Sl4ENC@7DJ3PVTcnLYY=ewq{Rcf65PMyZ!x3=4qxjw(=BeoE=@ADyFZVx?E02zhBp{Ih z?x*2f8=Fli7AZb^hA;%ld@9G|$6<8ZNm<$1G+4L2%c3-8dC|YGBs?)u6cWwgy}}@sh}11GUGU%s|NiCW?Dr<_?*J(6+s!BR(D;2+TMA z>hW4WrZ7@dQ&UnVz@QokC*aLz0D%K3YjZ?Wdivt}`lE{4yOsnHE^|OW@QIxvR2t=S zw;^7K5P&$))XM4%wxwLXFej&~q9PLD8g#LPuyQ_ndxPH&;TQG2dtERel0wPU)Kp*p zJ5-%Ab3I{(9%5#7wR7p3<_n&TG2}{^$|LTmXluU#{~Fp8V66;xMZ7!R-#Iu44hRSc z4b@uJI}F0d$4^R5er08)KvT$c{jaWecHl@3!jb3m^gN{`Et!{_`-}A-a5f;l3b^!= z@^XVhBcE=^hn*1QtOD;760aL0g|q;~gY-g2M+Z2~8z@`CH#Ra+h z`SVJrmGH>ONGKYA{@ek9f8>RzwxVJ%6gYtGQ=w}E4*2QehM-Zi|8*AC)buejZeS0r zHSBm8U4%!@Z86jE0oE&EvlbG)E=Rk^A>M`sZ((TI54d^o_91sXaQh>KD!F-i5JSQ; zZC;#5LcPew z+zkHZQct`&d@o?!;RC_C0#i1PpRGYjvFH}|bwU>(ed!A2M`6!sjumwxObkZINh?X^ IOBj0pAGx?UBLDyZ literal 0 HcmV?d00001 diff --git a/docs/source/_static/images/nd_spl_3.png b/docs/source/_static/images/nd_spl_3.png new file mode 100644 index 0000000000000000000000000000000000000000..e4e95b4e72d63c3216808c8684c42519705372b5 GIT binary patch literal 26547 zcmYIw1yojDu=Yy{(hbtm4N5mimvl=hoznS&gmg(scS%SKk`mG>-QC>{|Mv6V`@0s) zwGQW;Ju`de+0VowRQatm8Zr?w1Oh>mm61?|Kwuug4-*nBxT4oPcmV!;{!vE92?9aE z`TGL{Nl7DwKqw%x5@PD^Y5NNv6zaAFkmCRq!dR0?O4}-l*>58)bgFq!5Ba&&OZ|%< zUvqL4OY%Q!mc2n9W&B`iD)FPTi}Vla*~2Sntf|?6r@3Hj>uaa2WU92gfuF-d(Wr5~ z@bH$eiqwlUGRQ(Du-zz-D7m-m$iHx_GhWR?vt9Dn(ggvy*fKd35lfC)YPP;2y&5(gzjFGs-Pe# zwesKW5mLDm8FZ?us@hb%{QT&|+ym|H{tC3=r(R;1SfU=}U(^YMB&DUv`5f1KynU=Fl4_BOQOQ$Hz~ybW03LmP`W@u zLITY4-cmI^{*_kdQao+B!N5&0cj`S)Y_&v}RO(=r%Jo{Z>*!{Ne=y0z#+& z3H%Cj!}oAZl&Cc_GP0?1!>g++ask)U`i>R+1UdcskH$VeJ`xh}x+kHSBw+L4-{g*# zmX?~Bm{e6&QPc0hee&}Yb9cWELL=tn;yPMtWjp+c8A!^xBptH)_;9z@71`)^lwo3e z?^&e2`^yim@NN3;#>VGcA&Snx&AX8672X!djXpGDZd_bk8RtKU5IDF1@c#Dp_Qhte zF$+$I-~ft9(KD#Tz(*2t4l`7Up!>0C3T=;=m>7^37a1Yp4l73t{65Wsb2jPKhi4dM zBxGc)in8R8cl!F`;^Kn?1Mfe4_)Uu9e~$pR(Z+%^B{_L!xhN?JFtCrmYcF$ zwzi}=I3LW+g7Doa-v@&yWXiNFDOp%p1O?yC`-OBr++A%ArhZLNXE6=`irMRbDon|L zb+nWz;_IWSsVP6?2{xITo_@K0PQV`tqwg*xWW<6~QE@OLA_Bif4KHe~+3RYt!Z@I$ zq@@0Xm@a+%6_$!BPDs&+hF+`RmJ$)Y<)0he793kxKO zrKKgEQbv>ajiZJJUh3M_;Q|vCRrUMdVGE6J0=&FHLV6~qtDUjnVfG#qMGF>3> z`R&DCkcS8(WAW$$8gTX5azhc}rSyvx2&AdGd8$;0>X#G>HFodR)YQVl1K4PPA{;T^ zHy-3z_^2H}?3WBwRN9`D7Z(@V44NRH*bG}xNq8$(U|j$Eci)Od+}74+^4bGLl|Sj^ z>}*P}Ne+VurQBb0IbI*HkP5iQ^qM>dzki)#n`EfJAIFU%nA`1-+N*l1-{u24BTwGi#UxLk}qM}MQhmj}+|6ax=AlO=8r#_^o zP6`MR*Vay|&lSxhDF(k;-`#C+-cePb-+E!qHC}Gmid3bDMwtM%6<$_K>RH+H`WV=w z;S3@9fyPD5olij1Uq*z_AoM$Dy`7!ZTwEISDinWE0fHIgY~W#Hegr2nfBKLQy`8}waFI9w55iD>LBX_(kiO+Bt(^TTCtG8BhSS5L1nKi|XMJx$P~ z>YGEohpnC6_vq;1i@hlrqs1nVbOAT+*ROY{%9jRHd2XJhFePNbvG6jisHg~lqOY&7 zh=_<-B%jyizGv#|XNZW^voeIl#7p%K0fYQzKu~b@%p!c1l9F;eoX3)x*45Q5(u1QE z{JY@_A3s*q)$t6o`JQbK9?aF%*3|4D9MCDIr~_jGIt2o}bVc6t>hNH7^-rl=@9=QB zzNN03T8Gu=t~2Xa->2`z#eJRCm03!%M#6YGHhP+NzC>%?OkB4Lx7t5)re94>wxMpTH`p zF%$_85RaE5JI=PYwh$Rv*;lV#jgF3fyR*^Moc!suMby$aK0ZD=TB&2t`Mxs*#Ny#X zaL$}axCv$L}&Cnwv!*jU(q@gQhRGEGM4=H`ZehpIgQM34+H*eW`V z7$*APy)+xi62nE7oL7+%WK6KKvtvD^$FvOk8)yf!wRfi*;q|#O?%)YpTH4hp*e_-P z7+^UpMzU&ZI31*4p3vj=Wr+oJk|IzJ{(YO2xcC}4Cs(81tkO_ZJ0H&L{9mAJH#3G- z1ZFZ3J|lcKG2u^gwx#ro-D0Q=4lO?~?|gqoO&RN@jQC&NSzld^O-iCqs#XUPS99!nQ%Q+lw_18Io=)O3L%|bN6UuF?K}j7n8~q0@)iG3rOWLUKCuEu6IF6OIyA^ z5s79tMCY;hPazr>FUZeFBNtBl(8>qDf(Y=40*#21o|7}a{H4zWECIpD$fp=g_hU#^ z)$+d1^(n%&NIKGd7@o2|T1PO-#~zT6$i~q=|0oJ?i3DISM%&B(Ok=gx5rIrNutEPq zIyxws!+3!3u>+@~Q-Yom*6%P9;6+)scrfz+KJ)9E<)qaULBvPYW0+QWBL`CKJ*mzC)_STWUe|xW^KrwB}p&NZ<{6760CaVfN2zBK+_UZ_x;}MmW3B`WN z(I>aSm{2NP`S$8)c$fmI{jZ(coLpTm&(CdK7e$iZyN4VTf`~3ECic)x_coA8OH`xwMC;+YZN=YeZ!)WNV5MyA z4-na^gvoyCjVTQ(;ja}91_mR`o4X|iS_z5!oBCgIfLn6TE0c)@!T^t=p9h@p{vKq* zT?Z%`LIF9Hzz;mjEHecCSoB2Is!z1a3^6``-Vfq>?GL@GQFLh-O=9VjTol!|8Qi$j&5{E z7}N**hFV(K&C%%@p8J?U0BHqCdfIaTTrB1JLfvO1lw^fPaImd(1pg)tp%~ejr+qNX zQT?W(qCzDdh1<1^^oL#@kj)T{4Z|uESq}V5Ol23Yo*pF2eQ^F~2)LoMEJ;W_9|Sxf z@Gd_;KllTom^e5NbH@o^5qc!Ep>?Z)u|nfbg+qaRcmxD+pk^auP`)n$CpbaW8c5Pw zT3Q0FMPgD>xVW-P=hp{eYOSn96B}Qs%03_2p`b&Yoh6C>%|6i6LoY16`0LlFk~S;} zQGmvQQE##%NcdheF`;O1?CSn>%x`G$1JMkd8=8 zckl1pII~JfU;qch{YWx3HYVV9NK$MH2Z@y8;NkJTzkbyqMu3F^;sz^Q^;=L-5J*U4 zn|}n`^F~F#;4u4>C)=OMg5$<4gYp@ia+L3lj4%law|95L*6Qm&U=@Oz1_UB^8$t+< zw-6ac#mKTUTL4npq2;Dwk&zb{7eYSw?k+C1e=|*1^(`?8nC=rG5?~;BcnPmQbfJ<7 z)+2oX5E>a-TUp6KPp_<^(%au}z0}hDx|?hii4xv|1%}eunO}&9r%pT=9T^!J@^NpH zg^DVdRllLWp&?9wa}Feuzo6bMcZ%sr*gp2%pbo5K@Qi$^hng>8ZrUZI>dwMJ^Ex+`xmzFZC zA4>n{AK67L+a=$=ak-#cPabV-(ELUq*Zc!YDCSOUZ-5CHuc&eY!O84?fqZLIq1xKm z*r;x>X{z5785+8xJ@lTP5GWHC7FIML9Revnqww!Ec`Yv|2dsdKipon1z4mR}x;fhd zTN+>G@=*E~b)FguiHVCF8yow;PWbcZPt<4P@GCD_UVzFBteToy+}E#&5Pp9C!IYXR zJ7a`TAeCdNPbKI+0ow?evnqZ3rjGzf#c6108X6kPbZfCy-I4r({fw4*9}nhuz-jR} z@6%+Ip;iS&ojlpNfx$tV2RXJ#DPj(@0RR+L)zu!n!QYCDt*ospd*LFRc7|Xb9v-H0+s1ZEC6vyhKWP=K{3OR>1TNUZS^Fl_oG^&xT|GOHfr3KR ze^#TaJt`sscU+j!!bOBquxKdYIBF0XngI+A#0NAGi%JSO!h1s67~{`~-6!Cpsp*DT zz%x^Ga}=vH=9&g_EHdzJvJA8q|ma^>=ZIiLFgd@$m6Am6a<% zy~1y*KpBYh3^=R%%OZbaX>~@Iu>wVa4M07RXw$wO72)P~?Tw{*{rWZKG(RDzFtq{r zu_W^&IR7b@ugYULPl$&H2o}xdJ~=V*-r?bp$!h@Y5|f5V;9i(FE@XKx0#!z~fnA&A}1?C}d@2b8Hxc`yN6d**C)NEOxqwNP4oe z&x4G?Z-}{VvPLcB6Pc5_tP`_X^SQ&Maz6(LgE|{XGJHYgqNAgOL=Z$pyO;E(G6#8} zFW_VEuyn*S(P-m9MOH`85EQY%ks#!>sQ?mPfFP-DB~YgKXPZ*FROCof;JxK?Wkie{W5B1<4| zot~}-B_T62GeBcWhT#c=MlEUp;|9zQlwjlIaWCm&!@`hJJBWf_$^l0|hTDJ!l4j@~ zB8h8iCI$sPyTc$Ur6waF(0ccdgn)qH#f#+3OvAg<@Nkqpay0U(k&*4C)>fbjc*$Km zR$Q-wk`fPKbqRQ98?VL(5LeLp?quRZlt;=5P(|{+i6S+xSlNYygfgZOKr++Q*9Wqg zQ<8|REJu-cxcI9tU0B&!hFZqP+5Y~pVpYU@)F5A=tMq*a3t~D2qijuL*V+yPidFRO z8v_%QnyMOkrhx(Lq3B~yR#sMdx#8b(xONEZ!b?TKmtVdYYyA!o z&&9>XL3$Jk;4N4^CtnNb0ZK}(`BY|Nt@r_LMWv-6N^#vNB4ZK~G&D77m(gLsHdF+k z`h-hO`}>rE5_C~rpL!?JAyV(5!5Sel}fZF@>XS9i7r z>g6Eah6nVEjm2Ii7y)2n43?0LiTC$m$>CZzj#fVKoCXrZD=*qVr!LAe4F81YF-@i3 zNOlP#{Yz3SQy>qv`bhb|%SQiQZlw|TFierv1k%4EKCO@*a249mcJvMZeA?n1Tkr3q zA86atLH26yq5?!F3r^pisth&|IAlI1Sg8HSo-szkogPK}Reoe3{<$?)(@P95_*S>C zXzGXdD$0vr8L@Ro$|diE1oyF;^fA1O=9e6^n%HQl8?p@LDTz)i=zgmCh;BUMgHW)f z7)5w!pw0nr=(L~eAY;6$gQq`|7nB9s$Hs#<$jbdEuwg@X6Wa`RbC%&(c%ZYA>g<1_ zse+P>EDb5bgKq2u5be3uf3J{dioc?X9mhSlmVGBqc1d0IpAc*bdF#Wal3!AQde~Fo z?ojK0cfSC4M{weSyWju4yYk=Nia-%URn3ud`8U8qbfN!@{LSyQ#U!Q$n7$WSC?oKn zE%M>|?yyA}frY3WfF{_`BUa*~dA1$pMLPa(%s`X#znVDz(@f>9XdSc-Rc0~2BvoN&$=N5FpS$=0B0B4bP12)d9|Ifxsob*TG>fs>9VsC*E zwF7@mA<_nXg`edISZ$mKtk&?)6rI5EAvNnDU=v4Tf}0aZ{!^L`HvUR7n}*DOkqB7w(|Vi|LiUaSSE}(!h=-pufH#-`S*elU>upEtfivF86X$_ z^nY>@fGRn{GraTEKktF_Ouau^FojFFlLQXmCW`P-P_zE`1=IgtfRxoV%%(nKUj7-V zi=lW!{JHO;9TYSSmBli1@fZIpJOA&cfJ1l$Bj1u_gQv6qJsblbrcxi-dS=zrf%VV( zQGhcJ1|i?RdxPdt?>Bq#F|h9e7Qo$EmcQ_ovN#zlc$6gDvDGO1ZZq}&`0CF01Lbe( zlOmwAu`9gzO|1}}lpg}xJI|68HhJHI^CBZ70~0o$yY|3SnH=R(T|)!3FVg`R1Na2s z(IHrkJjj@n2o2KW`;-9=-m7nbqf|Z`8yQjD{vwLS^6CXG&B{Yv>bPFOFIUlm(D>6c zQd8%$#Ss}w4wiW{5)&^uU%%$2ucK!d^i_*l%qz|Nuv4MNCajGOxm5#l7zfv15tY0Pk>CND1s zLvdz3*^QCO$;lNoi%x1|`@wXg8?<6m8RCD1vL20D)Bth;uz9|bAn~gfCMl`T-S~BA z6a>?iuIBPfw)g?asJoR2?JbN(%Jg_nGBA=9%s|2fIsib{@o`sBO$OD7IUYMH!EiFG znQoWs?TlyXTobdkS{uctv_nSgxPOFwa%p`Y-5*y-V-nJLw>hCf)=w~S+Ry1lL<}V* z5%8{lssrx(<;y6jKn8_IBrL71t_nl%(-IT$Ngp^{DKanAhskKW79z%k^YeXVjD>RY zzi~Mil~qrUxPY$ z!3z`HFj$8VWKCXr&5473L0&IDU41` zm@eEf+u$P8aB@0#N0B}}yGRzcwodMwU~M*#CB;ppC03NN_`_4{+KLc--4u? zrhFT>&4V@x zsTAZ}K-Jf7FSNev&#`|vtQQOLB67y>YBbVFaEQNZWW6JJq(9?d-4L`*Q6J*@oZZmC z4;(~HY(?%k0kjB9N^0F)lK3iyvsi-`WE=$_$yZVD<+}&ykGH9en2Uzo>N@tqw824e+E(leR4)6>%|sdb>YGU%Mb z;nL0tU5N50Z7s?>9?I1dew41^Q&P;2gxOzKa8T=&gJ}J>E|9+}%I-Nh}pYJs2 z6}}3G17|MiLAc}eYiel7ZZ6UG&22nwMUzs#Dh4UP_iE?K=kTe9p?9T4^z-d@rZhs^ zr@b6>7EFUz8|a2MP4M%J+bv-^0z{tHlkai-@?+^J29& z=7ofY>Q}itpbgDepNJZUKeqccYK1;0b}Nswrl~ew>#`h6%U_OXj&Srq>T$X~W3Y)&7nO;5EzJqD4n^&lEMNL~v0J-Q@&kkXl}ZJU z_oY?!)8#>i-VYo69{UQwaspuC%u_ti1AU00-QvH^PTJGE4kzWb}VVj6pMKn5YpNjs%9mDtJ=}=aly?sszoCt z*beruJFFcC!B2IrPjP#rly7e#S>fp_zj<67F94cp7U)EXOe#>Ve;ekfpG z9W9Xa_{{qfUk;}62X9&b@b~99Zj82PnXBiepfKu+cmYl_U%{_;4kT_)sVB522PaO! z2vy3`6{|Uh?#u>kMVVO99wM}~W_`HbrcZX{TxQ5mVo1nv-S$E68r<9HR%P9*zX!`m zi;;IbMEgE96M1ipX5*07O6Y%C#~v~w`APKg(Lvi?BJjM7c^wJdqd|tX@?|UP9sYHK zo)2yUF6+W|^!FB5QRPp0TK69GS0Jo>9`0r$Nva$lUh`p#afn(v^j3M4lreiE!o#25 za9FvfBX&C|4U@)^k|BI-M^7>FXxERAK^zI$TgiWPzV-L*P^d;4y}+CFPa zOzG1Fe5HjQM`ltIp}2TQq`;KBje+bmxXX6#n?5REN4$9iA)o(n!@IN1J5Y}!8){ujNz=8B6Zev2P0h*luRh){nSM8^^1fb^R?PR=p$7tR zR%ZxpMWs-P$!6wPlsX?*-5mBO+AggyfKeJIT3Xj*qf|YD!_dcD7K|`CYUO zX@|c9IIYuP@1jWsemG4HebRe`XWp(RAb5Cx^do?w_ghoGkKLItC|SPZN) z5nQmIB=;1Tv)eCbxdQFl#jdVR<9*xEx3|n-GYlkXn!LVFm*NSNoVMUnLlZZ`L9hAn zcdt8{ z?t*#{@!{UbAE#j591uZEQDe^FWZqn49RKfx5G8~#2yI(rdGfOtHt5)h>X$()mR!0S zqEtVMvq9LC2Qri|@hpIrY7e~QVg?wfCtoI%MH z6gHNemiH@GsI=$!<>*Pe4}xcbT69n#rz^|HXuyIWo4r=i1KGO0qaKPowV1VO4n^U+ zj3eTT-_I>P9pJlBc+1Mk@vnHje{$%3WF?yTkv^M|D-eW6-nG%aLA!N#dD&qTEP#Cw z_3rT|^YT_D)@4Jkq8@H#^c8t#Arp6B=LGED5{?oca zzb*36ue+$j3+fyXQsof1ZifZ*2I*mv_^i9?lz!{FIy1fW58b?Vj27N40J0xS_XT(4 zZz1gFi(zzFU;g%j{f_;ma?7-To^FS6t)feW@LO39lX4laHCzCtq|dg7mNqNJ`(lM5 zG3zpg7#U734hbVD>@7YmiO|7(-W|AK*wK<&-wjp@2BoOZpp#Y)V_S#V+5H9V)HE%# zO~%GUS2m$cnr}0AZS1GhKSgpQpqR4{Z1H!u`ml_bZ3eysQ)1&MV_I*6!UQmlb$;xS zOKTy9TF>=1P~%@*eu?=ZLmgGWP4L*wMysYeO+MMan>+I^HZIw?GoPG<;MJoGt$m?b{bgesFAZD|Tec(evcblzrCus=gfp+8E;Gp8_ z_ZSxaAI{xvWvZFyFh(OIe(T(FuhRI*CNlvcGcy{|v6?*Q%&&^IGq+V&QF$pQY2u_d z_TZE#VMR-%8w3u z_9PB#8AJ_5@{bmsDQJ=x;#(iReV_aS(BD?m(Cln&_-q|~2E!~U1mzc3B6QR4fx&OG z$JI6DmH!kz-Rr1pb`2vT5|j(Nt`}vkVsWE(2nyA)Wz?$A`+ZE*in(KWpvnzuo9maJ ziCU-}Q}ZvlC+03SY_SW?qlW4Vv#c@I22Ew6LkL~=-bHFvXP!QBEp@Xi=;-hlMo!Y!5$fxd~}C7JWat_DxLVIHVbD`iUE3>Z)zit;vI zt=_DCWs4AS@wsD&9NrtI=KHCC_LXGOcR4AuJgXfCo1|Mvv#iDG#IGLCxG+eky2E-7 zoO``tCXj7&KBL7}4m>N-Lk92+uOIxYq&qBOW zd#)otFWFbg8tOH4M6agLb;K?}L_+fQsOEwD54B6TZ-0ubdOUvCLbaH*frn zx5)149#$zsch<6VOQN5OUq0&kfGjBZC9HPWS$G>g!&BQ6FQs^FoF3=stJ@tl2gLTa zOWQeff#Ef$7;-$3;^Go`Y^B^-5$Yr1hPBBK;@ar*-5Dkxkzmb%e)9ckP~Szq%5NNY<_Cg*nxy<36{Ev%&o;y6 zy6v(4f%ZIkx7=Cjq*h;k!3-N*JUnnVff-Z#F>U*^B@gnB3NZKm*m#L)e*^=eQ`(nT z^x1+^YD;Nej&-lke5ca#bm#V@j#|6%NSj)*u-fVAN4eH&&31?@5KtnkH|;lTH&|#U z!fkTd^!AauzS*0qvE8#7V412Pfc>OaCq6vi{OUtzk`xAO;fK#@lPf?~=RcDxbkv6z zN0uo~_5xqd`uU!mO1_rT^`2bc#oeUy+m6Ol2}EPm5Wz!G1kJizJm1E~*`F^vmNwPr zg*g=StXi1EH`AP z3Ck3FpV8%UaCzD4salp!1ZN&WylZ4Ox}MUonzJhBNplB-hl@8pSHj~xUse7)d0E5_ z_my>$tUZI2zCejJrz88H~@iDG&@e(2$&Sm7~srKWaW-qM!n}&}$+P}@c zTtYq-Di}6_&a~L-;nC2<(bC>U8??Be@@G7J0@LeWPdeFhc8=v%Z7C+UcVKp+^276n zsl772Iv%zSp)1?PYM-nL3QaK0>}s|9z>LUQi#8u;9!;4kT|ut=R?~IsR>)ReeTE$0 z@rtpmV)@!~$Tw8+g|NxGe{D5V>ebkvKT^@;ZNqr_{8CHRrcZyk+> zW}0cBNJ@_FzUJl1diRQ}mNW6Y!N5t`(UOn03=_XYM!Cy?equra(f~{@OGZ%y+2CeN+6upVd^CS)@7xG2}8n4(FPRRX;4rWow@bE6;lRN+c5>rGI zk<18_YnYnoukKcefAD4W_>#u?tMlED&;gL~rCC5J_Pws4A9*5@JV5&86DD|Kzn1f! zik0+fjD~&1_P$1zRgv_Fy{p$4^~krdwn*p2UDN^kar0!MmPiKT=+r?{T5Ny5kJ~>}h(o3%vP~$&k;porVX==6j4GZc8nwND)KGI5FnS3T@y;BeT z0LI`-?^M2x2Qg*)8cw01p>c6nF55bjbjM*Eq7mKG=`=yl+MBctb)L_Z&;PMn9@MMr zB$ZqWN!NydrPEtpb%3Q1yY)ghL=OqUx(WbrBG{kA7GD5YtwD^Bn~T2L}N=;n(Lt z1D?9|y{mi0cD=L3f(FQgLWcD;r96ubJS*7brI2h3ZdGHvlP zCzn#DC=R2cHVQ|EbvmSdqIpC#z5@HQVIDXh;SAZnc_Jo7?w1oW^{wWvHhP`>8}#c+ zCd;2Kqdt3}3)Y}vI@~z^(DG+ULP41Y!y6Ev)X#AuiIN^2EAEz zNz3ui`RPzjzF{gCCP%IMz$h~GYbU|^@8ooPx0;4uuG$$>-VL>StRCi3N6We zwA8g3V*VcC`x6}4%Rj%^nQ8bm3In)B0=#dMgdOiIX4#gS6*!ME6`|l9Ys4-7jFad||x0wYcQUR;1o)3|VtYSyxz5M>4rKCzB@;%3}8{s7$*h$A|Fm zhX7Nl(HyNg*e@XG=cqb{o0}%4Cf>W2?^R^G$wZpC25TS9GsojQIeZX#(FNSviVYH* ze33RXNJ2S{mSe!g3G5=lEhoR{wSA3`sr__;xf~+sr*n`lakn#v-IKh>dl8S zGBfH}Z_(|1NxMjOzw)J@*!YV*=Vh!qIx{$4N?f{2<;IiyCy(~&cTycuty?gDJgh7! zy?1u$3;b@aAJfajmEM5QX1Wvze{WHQ$%WmfpKCvvO|lybDiQLz5<=!%T-}CNbFMX>-r*R-e7qMQFeU9kdW#E?nMx|IEpJ5(im^ zs>WW#U{S9?fYw}#{S&_HIeJwWy-#$EjL$_h!O<(}K^i zI0W;A0!$92sc7n6zPcD|H;ugi;u!66^rpj_of;D2i)BBaeuY8-JmKj;#O08CW* z^hayUy+4i%Hms_~)vIecUViJ?HD%{X$}_Znl|idf<+vYy{lc!PQ1E3hThEqxc!EO* zU7nBJ3BOB4LQ|7UA?u?w>*2}F2DHoil(6N{$xdBny7t$I`~_JJY3J0#Cu%;>@5G0Z z|41(4JHFRrzi4=RltXnFg^=DV*S+mZ6-<VSdFHq18BR2=1AbXgeFts(R%LqJ;f<0!cX`P} zP+Zl6F7mAE)YQY>pd`qpuW|Ei!;6*kg?CjcKDnp!O4JPZ4UMJI4HMEWG+I0KTGw6K zeD%0U?C6CRwdkq+(PS{5fKHS$i)wHbjzfn0`OmQSdm=WJ1Us+Do~>a3@Km!uEa?t0 zJ4>AkSsff4R-`hBt{5-XgB*)Vy@!w;Pv2Bz7`*EVAFnf&sVt_3xOyUq=bdl z2T;Icznp^a{{6eweg=mYQskwd!fO{eudZ(6IHMrnVKQm|d-S?xmnAalyKotW;k?Rt z_dU%QV(mKmL^I7*bg_G@kL^&+XPi54jKUvwYtPA9U(ZZQwsG!m)9geg0xe#^jeJ6cq3Yg5%J<=hDSQ+Q*^60-zK z8wx`YCY!yQ5mVEzUhuwU>Up?+`Lk4Af~S6SgS$mmUdK|_O749=MGDONrirD+Y5iT` zaZinv_gQ4YNT1wT(N3}l3I8V)IaeH}tT5ku_q!dq5)E;2og0t%@=S+k;z`+|GT`g1 zj!dD=$Lr!ujinaz7I~j=4v9+Dk}Ucs0)vHP2imxs6?xsRI%l!wFA*dw*sVbYQioT9 ztQe0v7aVi~JX{Qse&MTARiFb|lXp@U>F9JSoDa3*LO8SMSekrv87Y~1b_VMUT$A!JN>Et-c)uId6HfMkzB4XO@u+xOn zTNY|QB0+chOe!RCDfF$#UTtBY{`;f$otOOt^WL=QJ`CrXB2fExwNF=>p3zh6$r)R3 zGnz=-jo9z)3=DmoeN8WhXU*lA`nHbdPMCM}WhHMsTXVa`Qytu#)y%O@c4w;TwgPr} z3B1j+yGgb_qqiG82XlDj(jr8MoaatmTb@e!K5vs-tu$TjG-tQQC9OAwm6or5Bt+Q1 zu$?=teO|@_!ipMxh->~jY%8?pWwFCJS=}FfSXaBp4Jx{(bY<{D)yM@m08F~2*Cw^@!%52MwiIE;Z0e5l^nWUuC zjLqr!dE;W4lN+RBN5o`%-LH;yelO4b*V$aY?_E_X-4H_!)f*G<)zKy(Kt8OI$sX;o?VD2x zKUzM)h<|P<3p3ub(-VwlzBiB54akR8pglYNjN>BEf=14@b2<3y^psq%Ro~8T^7{Fj zhtJQLnBMiIEBl`!!n+IBEVZt07Fg1$>9(5RANZooE28bSqQv!{qd!j(oc?i%75wt@ zE7|ADL~3Z?t?QGd!UD61M@704Qk~jk^iq+KBGFQdVg#7~R29kN+@l#~%b1-(UGpX( zPaM2+TiO(-vL&7o;p5y-dHoZOX$MY}c!L|ta;x9u8kJes=)o3mF*OHcG=F{alW%^~ z1F~#7Um_-ci^4_*A>_9?-h7(mX&Cl0^W3Zs*(zloe(hcJVEVMA7*0O#kyol z7~prJ)lyRfGX^IPP4-zNcg*uKMv?gtQE_pq)}oL*Rd?srRAcCEe^1Xk{%8_qYhg(2 zV}4eiw$_6Zva1~2{=#P}NVoFNoivd*-BRjhcg_R#h=NJ9KtM7FU3?|EBy1i9m0a=hiFxN&le#9aZg48=~1jAlFH^Dq@w3)pE%%mKd4%49|;8OUFmXE$ZVaosxcQk{9HFf`MlkE`@5c4+8v1yO_MWyQUWeY@Kbs)!&{e@-bO7M1b zW5+%AHy#lwx6qVb6)g?h-0>#ad5lt6Nc=9fTmEDj7|Zb;c?G%lGnVtUA1o>@f`@E4 ze=pFg>rLZ#&_%Ru?knyin+(d`?@D-y50eGUq_z5uV+~A81f!C+cc6S$R8l&U zQ~*uso-e##b8%J;IFbh{wqA ztt4MT(fL$q$b>@t#Y*m~^yl^^q0OE?If!U^789f6wEv0IEP2DatzOs`Y;* z<55Anmkuu`n`}QY`98u!ezy9uf)q4Cqy&<+yEW`7@>(-o@pd@#S8v_DAetfzEItkd zd>-sM{Joj%sdU4gIyVUGir?&LGk~<_0wM~WDpD@Be(Dw4+2j9KR}}wMS0e2SdZZ?X z<@mr7qSV-sjy_6(+FPI_LP-sH*Rz~iK4>1b~K#I#l#c~^Qp8bF-n~J6CU2)}$kdhVOi8Z0PA3{yaVYtgx%nPz8U8M3UyWCk?45LYHUSTF-F%2W}6} zDu=P7xYpdYcrQ01o0d6IS64Grn&z!1^X-WVkyhy4Y;&W`)icX6DKrS<-a5222ZU%U z-_^GZ=G_*w6K*@xmvOxXT|%uJP6o*_v5ynQUeO|kX5L%}^O915ZmK^tRA4@Z@jJyh zqQZURHhB#x`d0I;Zh^I=eJm)v%Lx?q2!&IYgsMK9&yHI%kTZ~A$loN*i%BcxE*$MG ze#=yY`Bl@H{4wB8Pfvr(bHk7{ld%H%#fuksrUpMeu8(OZ7AISb50WVMZ4Tb&@Y*At z-EL0l7wq6b2oSra2oH-5|=a(M~W`CglzsAn{k?Qx2{|6=EO-WK_l8BJKvNJOx zd+)unvWY_W%1%~7*?UX&9+}5c_R8j1hws(r`~3sHzxcr!ulwBNx~}`ap4a1UoG)%B z`t}ya!!BNHP<`YQlF3quF){ajD4!{eL0^Q|m)!1uPFo4OR# z##dJQ8DRThIF=`htvBu;PCz$+9_!0*MTSByUS2Kj|E?eeFoVTx8xfvbJ7xBbIXPcz za-x&keaQ0iRcRW%OC8_hq?*7=d>cE@N+nR%bS~DZ|K*{>r^?Uyf&Am>1Z`$6_rhB? zqxw5&?~H`0Ye`8sEwttOVtQ<#rA0m_{!UrY*cvheDMyZIdn=Jo~<+)7{gvZf#lC)Nqq3^OvH}@J}i02byY2Z+!$E6+Qg=5E~vQ z6iftVnZ>zw>mB-DM&$YpZaLWY?5^*o zMp=zgriW-DE$;GLFM|Arhh?s(X)HVEYtGr_byYQ2HO=F6?k$RY{j(F>y4MMs+b$Hq zktuCH`-MEcNHSWO*w)soK9%n8Ar}rxoW45H!WOJ6GkDx{qWtuJj0}kCe8_ILHSuM` zDOf({jCch~yaxWbhmeh4_hr%C=0O_{wcmZvS;}y@vOR}dx>!c;x)5u4U^2mv;i_ZjW0!slpQjL@YWe(u-bO42=XHaMDB*kq@0J26`ot#9*f zc3_%wcIia)25?eZaC_#21*V1tQe?a=6}~chsn2TWz}XO3g~EiCz;O*ibnlv+zSw|x z^+TcX%gDWsv9b8B6%$0#I@ho6sTdy=jo~c1Z+DJ>SBX+!s<+3x%y~iMj6onGb(Je} zFk16=s@!IAodU%#o>6lFJpw|#a@1!MT6Q6xP<_w)Gq@>5bk$$U zD#PmJT~T@ZRP#%ap>`y`*kE`($>Hj-GdwmT?6bvsR#<3LNP=7~ax+?D2*ZFA0Y+id}$~LmQulE_5XS=dATJXE!z@#k5QSq6Q>@0km zZ!bQ~VMrW(+FSID(AC8vD%tWE2E8(0Qe&|#>@#LpynE>N@#Qtl$-?Cr7CAR}r$lj2 zk0|qrdj43k>R+kpBxlRR05vr?7s~yO3AP_(F)DFfIAmU!WtY2w&8tPHWHbN1mlK$o0xK40^h*4_zcOC^nn-ptbAx! zdMb+G5!=T4+R$2pR`S$7(qd!vwdM>J$(ulyy!ExUsnv47C5(SdOK+lt+t%R%S||R- zr_@4R4D1!hQ+b_ma2@sUvALOYOxaJC=nQ%|NZt>1#9in*M4)e{O_Y$d8nG2)I^Dwr z@o)0q`n}@1$i5pVjOfeYz3eM>Mk1#lD|`|PuP84!Dc61eMgKt}5GuR}+m_k(Zg^VG zFV{}Z+Pz;0zsvcP3Qq$7Q2xR`+GFt|i)(RT`;=NW_}SPPuqita1y(DAm_!<5QLcv4 z^LOT~Jv=>^Ni8bBGF`F+pF~j$*-N4RCw4dylPpU~;R4cFYFwoyDkyE6WJj#>V{e6( zTY4S;6i=D8`sr`N_b0k5#}f*^$`3!cw0s`=Owox>gCedUm492LF!r7^)eb;xK^iYr ze65b+bK{OCf?TAqmk$%ETd&$WZ#p1>5^=B_h@JE;?+=d`r8r+8`LD~JB1+sZFYH#% zb4>CcN7_G&Dyu9{{0!Qu(J%ad1Lwz&pNfHA<)!*Sb;%=l9tRXXhyVg!FgVOsaj!O* zdOI$H0@wE3eWp8(l7Z%M_`*H2^nX?65~Pb1BJAb!|_;Va3m$|1;-~(c0$fg zTStMe#NKpGdJUQ15#I*+HHsYhy18S~SpMRflT*aa&X*C&yq|A|!6s`=7RR7bF8W(7 zJ;8e;srT=R(KHKRB=Q!uUEWl;A+

$9iq7WUQllFaCS7tmtrVZWZdbFV+>&Iq`rl z1Ll_g6L8_JdDvi4~AXm(}LI z<4 zkHo?XAQ3P!=J(q=KdP*k3zG+tk+KjzL2<#?Pr?y}*Hsr>smXx10TspEHeMk7AnT{f zh>c}&Qd{uU{PpR^AjVdJSLQ2|&N$yda;cq-+w za~sP=|0bJy<%)6FUHc=G7~0yoeX`oRJo>!X$;gzn?f)UwL`7Tn+6M@A=Pg)4%R{gd z-s}_@;w8#^C;0_YREXnr71@GoJ)yCHGnzqGO$#5CUACyQ+J=I@jWIF!!HAnr{}?H4 z=@QkjfR62-Ojn((u@OyRW^=>I=y&*4R{4R9`WBzpCW&aQf3I@trfWROxIdf^8=V>6 zS~k6P#q*DZx*VLGGYxh#eV0{o{bd6|V~YMuMHDmD6j zT(HF15nhEpK(c~F(zerW@M}G~=l&gGUFbD10!fPD; ztdpIGu1TZO;o*d#tQ))_Abjz{YfHGSt)W^5-FOqXcotQ$-428p`|;4^!iK1YD zqtd*JNa2b0YJ@UH>Sd+-vXcUp4GoR>hv!3&Nt8=Sneab^B(@iPZq0p5K=(WsTm5P~ z0lr$HU1QdrJKB{sub7(1l!v9=Prd+U~j zS?FQ;ltavpqOR>`()IuQB;mfSChJMpqnwfUQ>NQX@;E+m+sINH<_%UKV+RKVMYesx z#kiE197`SCs|ixOf{Uu!+5+m&XGK zsd3El@nBw^K)VFR+5_*B zEgZzi=r+haj?QzN$ppWVYZty=@|BW1ZeL(x>NYV)576>oY)Ajp@yiYRG>Wl}h41k1 zsu&}X4JxgUW~PXdsrcn(Ga9xm;@c=-R#ueNzb5;rdDP-^T#2jnQufR%p5`-OLy0C! zSS=ZakZ#qP9o5p=>CDVzCsF#?h071j8BJC6Okit=M_sww-xMH|trl7gT#^YD7z=y* z!kLl3G;!S8!E3c!K8Tj)sNnOBMJLX||Fm9}W!bIW`Q(48u+#djhveVO2(qw>QrD z*|@n4NOu$kXPqGsltj|m&qh-@P_7aEj2?+?nDBUoqA+zqwq8$1gI`y!wvYA}W@i35 z6B(dt^7J5mf5uFH=;n*+)g3n%?o-jCf$61!-(^u8w1t`2s@~-Nz;n%IPY{Rb%mdff zGb#P{SNgov{{#WL0NY6YIr~G@?=yOIKJ;9uCVgaNjgzULjG~*O@^AL@5~q+ln3~3= ziEIX0Don!~gI7+xxHu_9P$`+ieYOWjn!Bq;90_vQ@$o6!3TpyeCnt|@$MM^rj(G_3 zs6IALZ)A_%xpJ;&p=3{Z!o>V&Wv)}=bJPc`_N`C$`yZ1HqQX?hQoSXOUs_9z^gWki zPNOp5ZY-;KHBS}MCa!FFqtgGLD?fuNS7S$B(HGxN1`$Q(>dYpi8gDL_S!sX-va+%L zhkvj?Q@S;2ba_+#(%jsnD+VD#lNff#*DF5mS%wL`pA ze?L?{3Oybg81$k>{ka`y#qaRRIO6U@!%|J45V9ufphU}^!N|(olRcCsog1*CUYp8> znIW1$^CT1KVTnU>>c!eCw%eYU_8&EKx@0*s21VRZ5f3Jc$lQv3M6EFCrad>3pLqu3EX>_6faC1qr=f%3i zUgS9pu_|ZmX4>~XOG*3$lEfQ%Wmwf0Fm_@AV!#Ai8Y?*L<$WxPIq~)d(_%KRXp=t9 znVQtrq|Gl>M|{ESYsAwyU2n0%_Nd9Ay65rf;J)qA%%1|9%;YI@%!T#Kw7=uP)S)^BWf$Gr55(M*%{wygDWw$?JTwceAMf53M; zE}*n@cWFmnm(~64r{$jhV~5MNPw9!=4Q=pt$=sJsn7lI>f0hdyIV^r2Rx*L-xO|kX z_k}Y{K9_`Ruhn^vV(PWLFJu~UOB*917@PjY57z41D;q`&9oR9R@m+(K002es_Pqj5 zE%&uDZa0FDeO%>3W0WF61;^(D)7Ao&{_!cK*?Gm)GlLGjA07VCE=k-BmE3i55~1Y# zEa;>%#7kXxXWGU`;JovN2K&0htkUJaNH_Hsg;FpEH{pM8hQhinH{Caq$Fc9;5Kla* zQYR4RWxuCw7txpa?%~y`oNr@Z-Ko4DSA1xWe`y%BI*kNF*7L`}JIu|CPICuzLSbU> z!Y?9k;pQ5DGN=1xfwMO$DK|G3m?!kq_C|b2aRXyAGA_|`R-vK0%gba^27PH3W)|$2 zu)7T9(b%Je!qNoUy*U3Ja`$Pl$;p3ZGZ<4>8^+)q73r0Gwqgr1qG^^JFImHb5;JAW$FJuP0vi;Hd9NsBBsu< zIJ498M5=y;Xu7R%m>$wVfEj?}LLxme#`j0Dfol z)czEH2UNrLTMR8vvxP)E5{PL`A3kLGsG;s1Xz-`PaCpU2HBVufd#JtpVt6|>@<=h@ z^XIaLfT?(?f$WK2o;41-+f{!y)Czy!DHr;^`-X+AFe~dV=41NDj|?#~b4?y`ajBz? zeXq~=nCj}t!>8xl!xSu%2a-9j1-FTPPrL^22hho~`3qz5tmKn#xdwtScsbJ0#PSOL zPH`>_y-n4%YuttJe)V}Pu;q8vW)$Y-tpIhUqQ@#|#ppr7S@iJrsLt$R%ka9>`Zaf* zRD+dhY8=Et-U}`3YURf*V3@al>Y%a)hoTRu>#2<#)>gj%Q_GHHo73eJb&a z3^#BW*vbX#1&|KWHD)b@OsVE(iHx*{H2r$4%)@o^Z*>Ak9*6q{J=Ta8$}oO1GBQ?H zR#lfTlT%DA{#dRwBfNyT+3s|d;^MSrVhk3Iqj^dh!k+5tpSL7vZ-iJswLRmze*^DI z7OCiLuqf?~U)bxySFi)Vk}0F%-;$O7b;JRpv;|kkeDJPdzkKrlJ_X;49U%IO;Op7H zk81yaeN;*uZDV7zG%IOgv69N`aN6sGId#?0NNliECL`U5yu+psYMz*{a}sS(5zWL~ z9}n7v?uFgOSvkMP?83q@sqD(i%I+D2nUN`Scy6EV1|B{EfsVHJY2T6~A6VZ&krTbG zIwjg0>c#p`CK%-8H9FDqq_pZmm>TpAY5KyjwJI&3*(=$kD zCWhKt-P{!NyJ!F))Al#u9|902Ky(K^=Le7>;G=xj63^UO$?%H|390eBY|71Df|o?d z4-?N$*Z`&itlQTG=K!aMYDM^CFD4d{muC@$gdu8vvuk{QybA_==gr+KSL@n3J6AS} zn~!#Pu@EcnYpRyD`jPNn{gKYU0sz zfhusOfK(M27?_`*54ajJF|7GWKxf;{*12vWYZ3lWbgF=Bz5AvC^q5#F@jb~vveIpM z^AhzPfjG5!Cqs{ofMR$UU3}7|NH8}DL|D7H>W{WIJ6l^%B#{1UjM{KLQu9cE{`~pk zlaLl&Ojl)suvaIDfae;$g7tw1ia=y#Wi48FGg_D;0b0yqHIhRe3ZPlQdWx|FJ;^1i(Af$5jAG&3`^ z=HjWZuMafN$Vj4eMVf%2r1haZK|nUu=-Ov2-QEHe5+Ued_4M?(t_HUOwrLZvqYtd! zi$kLg%cq`^0GjtqXA$T$nWJkkj)2_(GRo|u!w3G7bdbw1LI*q{@nB?Op_JuZ41QfJ z!9r$XW_I`9y(}g`vjX%9V5|rPkOxvy*0;B#mVwzLmpcNWmZ@Jv-_EcD3}GBHGc(`4 zdk3;0iG?blQ1UvU2=VZ0ywI%LZ(taqh7jQoXUexie};tM!OtfOmY^p&6S5)S%RE>i z!VUn;%+l)WBoY~(2OLZOh46L?G{p$}MU$O$_DVcp(-RUt+$ z7vWDGNN6T`cXoJaXb3{4`fXbCz7%ePs7~nCEUiG7am20uHM3|M z8XLo@qKUP2N>UP(+&||?hSWoY{j2~&R6}FJzz?&uzK)M5fn{G=2`d3W=zl&v^Exfe zX!}FRC`wLf4x8q(-miF=T8YOAZZ0lYu3VuM_C9V6yaxS-g8&Tky$BxH^;hipNZ_Lm zFHEJg8)1*k2dn3@=r($C3kak@NfeC;H9{FM$3TFGGIN;A3CtS6HS_fJ1P=9l7+Vtp zLC?V8yVv~)fdI;rpPwI3Dit8+Nl8r|9Oeu#0BOP~Y%2pyKl=yD<%aXxNN!mn zl_)y!?c0OH!~-|5uyBKS&U^pqAtTw6>$Nie zJomY^@&#aa0ORz0ua|9hLEMK<9{9}rfQlfwb!(V3MrLe!y2rd6j0zy#0llTCMx5EHa+W z{QiB`gzFzFmR^s#pD7KPvwdm&&aH2+TFQZGx3I`lW<1~Rq`^V}kr!%2UF-rxV-i!MSc6^c=7oE8B5^+rhF z+?+xCb4^WB@6!D4n+o>i7hBZ!>ZD*Cumfm?h4u5@9UoT;!t4`en_Y3_YZM#+o<9bI z@jLz#-1>@H9u1SK8Ad`$1VgG28W90LLn=^r_L7Pulpe@tfJQ?gOyh`?UsRQqi8mzz zl%t}U>VPihs|hV`cuGEA*(~I`zP@_A9tlt=qTq7VZjwyp5XNuDvjLq8^jD89qKYvs_<3VvF4GlOUGpDaJargnc$j$RgD}R zBu3DZ;$I8r>;AA0%hYA=3lk~+^(#M$n=d}J{fUl;W+|ld-1kdNNT{r?21`Ei>lc`K zTOh9i)qObYxeRN7DBtTvnQaCzPqDGFrl}#VhIm(gBvF$4-YA;|WB~mQyrJMT0Oi8s z1s4eyHW&c<{AcK`+Z#jxPh%m|F)(ld_;wD221pd@qI^*J?6YS)Py=ufqUhT~*a5&1 zqoJe>*98vQU0`Eh0zH-nnu7Ds-~FQ|0{{?(isWMkv!JKfIQY(Gv0A9?$x~qC!$l1@ zgn#^lYy+wvz^RKWv^S6q9-P4bz*A$QG;e8F~~71u$I#JUmf?q?8o8nU;qnvT*GL zZXkf~)B!~Z%ycXSIGg>I!D|S(7XT2mS%VDTK0fuG!|!o9u>U$xe(+X{43G`9b$^k{ zqaZI&-~@pd!wozLYH<-H2@1lAl`}B0yiBnaQEV#3q-;#9jdC?%8URaSs6$NFD>I;3jr9i z#tFt20jYX?4ON40fzFj{iCD%4T1(ns|y;|Z2t2ZN|O zRC3m<6|NeA+Gx-khq4GtOG^v#^Jm`I#SU&la&krpnIcd-4XskxCGQW zq8mVv z@54nhoeK&X!g@e~L?X|BydwYxK5*m5M@Qpi=pl{*cMnbiJnlwmutQDsu;2>?g$;0^ z@qVWV`FpraQq6)y)Iwc`>~M_%KV@Wk`fQ?s??0dAa=#8DY7P##yk+VIL)_p>2A`9- zs_HF-GAti^dlc-NfeEV1dM7aEb{mc8zx~J+jt>9rx&Q4`=9`JGd4`R?yJroz@E~NL LD@qoL8@~NNdpqk< literal 0 HcmV?d00001 diff --git a/docs/source/_static/images/nd_spl_3_comb1.png b/docs/source/_static/images/nd_spl_3_comb1.png new file mode 100644 index 0000000000000000000000000000000000000000..dd19db24f92d4e640a1c38d88dcc2ee0cd942a64 GIT binary patch literal 27292 zcmYhj1yq;K^FI6mk(TZT=|)7lk?!siq`SKXq`ON*P^7y-N*biQySw4Pe4qDs&U=n= zyzgc2+1Z)dxvrTtTv1-)4I%*|1Oj;@B`Ky1fj~`y-+J)S;K>}9AvX96!Cq3!2?9aF zeEAIpNzcTCK*%6cV#2EKnFos=e5#9tkQ4SIR@G$kI8|k%L}_}d#48c8#HEW`7W$Gl z9I5*s{D<0TALrDPmBo`y@MWmd4kZa3ks;3HOay>l>eYW0yW3X-_s|h$HghBsa;;|j$d3{OiZY5 zsU*-Ss4FR12vlyWsQi!}o1EnM@Zo5=t*x=K@ynMliCBCs&6A0B+bOkOr6kY(B?ZdIEV}npUUS^SyNM! zmNr{%)M?24g%y6wKr*#R)zZ*#C0`=uuSsQ4P-nHpcw}TGJPfn}Y@s488ylNKHXi~! zyuGutZnb$_S{gp@Poh8`xWTqqv{Ccwq9R%rmNFo|UXv>~7uN~Hc5SFEb!c$#!D7?& z*A7nj!2cF^PZbMsbK<9p*+}+con1~;)SEcTgF!b5G)iXXqleq`GJ`h# zs?S4PLm8{ico-B2MsN@pY%Ij?zCMeEdWW8#o~Wp(qobp&Om8TNva&KYHMQIE($MfQ zEaY!rpNFUCKl5sNlvjbNhEE|d(PSd&)Y;SB(UKAU-^DQkdwlp?s>*#om z<_fbSiHbm-vrr1b2U^dR8-XuaO_$0^Nr5$^dv4hv#PsrVb8l{MTUl8R^!44|-y`u- z6j=q_;$kFjkK|aHnQhwhGEAm54daC;bIgR;T;_v#>xMw z9_)g;x;g;?K{*lfDRJhr2p@Tzq^qmz&!0by6`h1S7~vB7OirK7M6!^<+Kyb}Hjp5! zY-|W9DAX9?_BhC==!0!&DUI@Hrh#;LYrLs`Pe=}t4Rd(|IHEa% zen*>|muuZf^%T%+=-@kCN!%M%xeg|IsBDB|L#1O`GU z294whmi_#>ySHa)YuoCyEf4XJj6{x^44SXA^YZdakfhXy4QS-jY!96INDh#|4r*#hn-hPI!~qV^u|uRrxhA6B~9hqGQiS}2hKJJTAG`ytE=6KLd7Ubbm~wbXxP{_j+^}p3%YW0Lt8HbgAg@x7H_-Q zXft0MmG71}z8fDO&qV5RcR?3OMkAl;ez6mml!RTkTIJ~I=y|pg-qt3FWBD%jFZjeB zUbvXFg2MK|6<=v-X_cFU~@ys>TW?(VsK``g>S>+nRU=jS@*hEQ$dL5(Q&Pf&a91u`i_ zL`3dod?y-4MtOC0ZW#(7q+K2@53qS=kR!rI$~e8phG*)`XLp5kL;4b9AY*U7ku zY**R?_V)Igkct6)2fg6fKoSv1{V$v6X z)hH(}F5c>OjS||~(A30E>T$9ntfG?e{kw^%=qsmtlVpjB$w`T5qTAbZGm?9owNrN> zDt!`S-IpLmpqPu46c&TInVAy-d0kzi%8H7kcjzE$X=!PZbbi*Z zF#aea0&VamunQYlg&zVkGM~p8v^;{y0t`oc%fP~7_YL8PYG0}SmRP0H z-HR=`Rh|X!)*%hHHk(KIdp2iDef<~ri247fig|eOc5-rB>g^jCV1M@x8X_hp#@c4C zuA#BLxjE_iw7<6YZ|R|~hV4@i7B)68DQsL^LPA1zQtOs&HY8Ee&aGNxaIjtMjDbMH z7*mc9;RT`TPiN;X7a655u<0jST3Wmh)_BM3c=-4&o|p6>Jt`_PzrQ+U%85@-{_GKq zK;;5XuB9fovfSJikaGZCO^uJwH@O`H*ZB487YHsWqymF{*0YtSVBpl$)DTN#SQtDa z;vGoT4(ID|S@eN_THD$AJwH8yn5U(w$y!!k;~N}klb>ktSLF)=EsKx9pc4Ecfo zK54FCwt$Z`B}NEww;dPJQG39vgRt{qZ=LT@>+D3d`L3M7-{dQn!c@=tJt z16TA$3-1E?gpmZ45$<5yDCz3zYWX20DcGgX!&q`GaI(ZgPWP2bD3igDk-=l2+!$eE zm~wFVcR&`Kf@~LWShaolh0w^TUUN0z69m@7Znr6#<&Agvy&1ge8AN8(fpy=s-|`;xKn1ZvLR(?i-Lt6@S8D}18HOP)aFFMr+u-> z@XFd&-)j$RXBxG-8GmjP`^4^)1=HYY`%4%0i?BF2IL&sCCn6GI@ei7+@g zhSSXDCgHL9#`H|D)6xvr#Eg2OT=c;5168#Vp`nG0*K#EI_yf|@PoOPVhX`{E;S!Ue z7{`ETp0ix^-kh%AF1&jfCT2B;VSNX!u1@v==kV_rSVe>7BR}gOe`yr_GYC(vV5iH1 zYOJ6FbBT%Y5fYO+#`0GdxXZmg;NYC7n3+*{A0x?-1NTRA1;LIvIXd2OulYlfy~m=Y z3J8MhxH&ak;#)aJZFDo^KNzPuX?xC#`t<1)4$IL>i|rB&r>%b&{AdYby*(747c8@` z5U;>HjG5WT)~63{%=J!i_4Up-sB4m*o}M`ZJ~zD7l=3fjfG6nt@ceY$=CVI^YV_M5 zlA8xbEel|is3;m1Omj;|I>$;MrlHk<=gDXu8D(%`9RnQ{A!lH3j2{aOGd(m8j(P9) zc2Jo?M>11eTS*xl^b7+n1qJkxA99{QuuD|vfcBib&JrEqZ_9mkX%R@Vu@LAz7IXqc z6BGZRyT^8&e;{|_(#Oj|d7@BNf+i}MlJc;PjSo>_;`#V^cqeYMl7lZn7LFuFK~8S7 zQ^?4Oj1zZa`q|~JmNX(BH^yN4WY4KVt3NUC=+|afSHo42YPW|Agh(QU)FB*t24Q0g zR!+FSKEtlC+L-67H<9?~_aNUB_TgW-7VlQ2S5r`kY5OE%3=5(i3&3DM6lt4n)7WhY zPR?IVO|1;#=jOk{Lq54;s`nub4yoaPo*aWhB{{i>ZfsmR-(EH#6-0rD0ZN00_T657 zL)dW;8_in=W@e)k zF!EW@U%nXrMawOD^K=WR+cunoHeYQ|9v1Z)mj#-R&d>_!-+0Grr+>p+@lS8vPoWbN zACmmG2ceFapzQ2oQ{ZZCUym1_^M3*@f& zLnDcf>~9pTe!jg_VWtBRlI2tLLWT2voXAjA(mxS)ilK&&xW*sz$Es^;0HguA31RA@ z7ZoMMp!U|n5*it{ebpaE;T(fLiN~}*F)(jh65+G&G^56|HjAP#z|&9_INM9#eZn& zt02qb^Y}L~fJAK&Cijx`6{{4s3;Oz3nF(v+Ny@_Tdj)QoJiYJoxI%XKgytdkzG)8~ zQ|#zt>7skd@CphFx(!)*J|RBrVb4?}Fls`UT3@51=YEHI|Ng{mL#xU^E>4J(+t&4L zqYq@yof}9Mr~rJur^K+h+?#x`KwQWkRkYv7srS0Rez#;bEu@I%ei~@EzQ6xUL4nnd zWEs{PDM}n<%f+RoJylNm#4u2hmz>qi*!Wvn+41o)OA#k6BTTVI(E0ZBf@oYaEVIGZ z{0$Wq+1ppLnY24{(J4$iTie@k2`v9hNqK|{l*7f0U0rL1;s<)7aw{r=qTXdpPUrK_ zJC;R#hJrHOS1Zog!8XO-;T*x7q$H}? z`PGTZj-M*+ORei~gMtFeSmvO-uKeTPYLV(zK%1Bb;IatCkhBjVegE#SQ&%5T%LE@7 zNx;!RJ&kM2f$D(x)bNF;)^hTfQoi`9kpTq`6;*CIT6u$1dWj7oA&ndDJB&=OQ!h)# zRJrKm#U@URf0U`-1y*07*#|g8uXeNLW9BQwAIzr#A`l)b%2}l zBX=c-fA^^e85b9qhK8o~4Ufk=5FtR|zdZ;0HOC>8=N|-jJFXlr{hFG(&1{y}4VoTX zx1~SS=y`o-hs%0~)y>UKAR57i4xDR5hNU54QBj*ZNWS z+_JwLud&K)CX$qdARv@`u#>`FU0+|Ho+1Ehz>0*d6oBlChz`TVloT3jYG$p<4G>iI zZbdoMWNWLdk&%!HSq=RGFH+taAO=oqG@Tu1PbsPJ$Vgd0FMwb3Nrk?u^poxFD*(o( zr>A#!cLTcx`1;SO6)FmdA^=Sh1D@sYz`*`uQyoCsD52m$e8<6&#r6p{kn9giNfldy zouXoFa&mHTa4?txlqw)YL5c_Kkvp zVScqEJ3AYw3UhtHVTi*Zxl+^$88#oJskOE0YHDiA%9T!kg2Zq^!1)c>CnO{!j`#0p zq`=f*kAb+r*p+^&3inRkN>XCPNt)1r*l5p{85k%6<^&`J!Of%y35EfPMUP`f9k{Q< z#Q<#m#I6wj^?Z8-;1z#aYD{>TXnpvB4m4hnDfn77{7Gq={EdZZ3NX3V)Nk}42FJl- z8;C!M=8kTKQYtE#y2QwNL6>mw{^|_g4TNGcG7N%(&*Oy(fSEzt)rLuv1&1C`-C98% z3K;4FILNT|Ko&%x3qLdhfvsi7R;_hDWAJ5{y zX;{R>`t17AoAJg~Qd-(_vC(BPW+}D*4~P$AV`KgOmRAR}KsIn3VO{Y4ab)d+o+53; z(O17YLlKVkx;_Fh7m%ql^Ycii1+0N&0G#o-?5P5<1u$y}_A8(^s8qkEM0whq%KZBJ z8=zbu6=gyn*buE2PmV%2PJ6(@0-o#eV73adDo8ktcmm$;G_ue0ufR_F-`4G~6cq%m zyyxK9+}gT2UM3Cg95b($h#?{7c7%Zd9L?!*wt@au{N(x_agwy0q@?@pnW>>6S)Am> z$qDO+56SWIB52=>iozP^gf$JT;NiEHne$pmTCIrC)67k+wiI?u~JH7zZ=16tpp;-Dt7RKM;h ze{G(VBS0iGrYr`_i-?Gbi~lGsm7fWCfXevj3}W+FY}XGqn24}JN=ixrS$*zR`YoP7 zpYXh2+uBPu8c~oRzl!kfWhSF6{46N}FS;gjV5osmpN+)8B(Fb2>`<^?-$NYWf?h8t z_YVw!02D&}aUKf`%lGL4NTS){Xs-b|FZlEAzhr@e7CtU_pOS zV4|9vy*)e{%Ja=}8Q~%rQvE(+_aLS;!nr}4 zhNocZ@MtB0cI?O>yoEwLT+wz6xiu#`9wJ=6sh!w-nf-#g!#aZkO$?UsKJM1%C|#^{ zWgfZe4e6*+6)_AC5KxceYJaRCOt+(sijfn{G4$UYVqlJs6xv{pQ!q#Q!hdrR%{)Un ziG3@0#dGj7_Vi_J6+AlhcH0a^Bd|z{F(6yd{QqP_2XOLBm~%&6n&f*h_VK@YRofpT z5WeZ3*hr}o2;^dh|L-Z@J#ZEQbiLJiYIMsRe|goH)fES^$#3{6h-KkndFx>00V%|a z{wH$JyHvjrDL$YIvC`F7Jm{8i0Vv5qY)D%Mt7wHl6>(NzijDsigdBLQ0pIm=WnNZ{ z5PYuTzt359ZKN0khiL)X!hqgrz8;c|b~v*B2%EG+x(Wx6sm5%;99aJ?1QdPh62eSh z76KMc)A-+Doc1TwEMa7Ao?Oh!7ugztY%`1^^}v``x@y1&kzdxg``@bT!TK24m~TE{ z2EB}J`A_xWfPVe48L5>iT%r@s*?9DSI!1au*al6W%-}sw`2lQGxCAe}v%QP`Hs-kj zVIJB&FcIVN6R512>L4MwfsQQXEdzmlM4)=(ssGjCp!1{>&p8pzyL_MgnOseTR9uYn_U)dSr- z*!)lTuVJqIR^n&soP}tXUSxIvGM8(?EQ1;jXX9s}xS1El_57!}<&KAe3TB-X8|tf< zv2HJ8i3SZ=pB-}BW0+EpGfv8D18x0eAi}<%!AI0GH*a zcH$$@@;=3XGanm19pW+GeArh8Q@F(aPxYx1tk2&9UK`-({aSt1$UON!9Xq|*T6XOS zWQkw~zNDA(p8++z8ke3o?5L~x-$MW4VTKTevJVx^E6RUT9b8~ZYvbvWjh*uz}gm;Vt$AHuZuz*H# zKMFvK2XIxZ&%NvMl3tk6`JqHr)V$GZ&QkKsf7&%Pp3ef$UjBc(iaD50P)Uw}~i`e_!XLIxhPILPuv zMMVKWBIfo@B7ZDCA)(Wt85t2peqgXX>zz%xBq(^&8}wcOc0GY@U(;rP9xdwKd~+ zY2cK>bA~_wRMt>cg@&w9I&cxS|4AD$7+fg){Tt*O(`m;8eCGzOp7=yWs@l>V1iaFs zGJ2vai|w&9Tc;BkmP$qrp3>qZRL?%n3*V(afB5iWYs)X|7MiO4;eL69f{}}h>n*=c za~~yl!xA+nQ`$)Xz%>D@A`J&s-tEox*lMDk0U#SW*{f2`Kl9l9DyXP%FdMEFYbkgY zB_82<-m^b*T7B`JAwde~Z4|Q07y9eHpdl!3K$(>SCYWfNH zwn|T-GZH(}zQh^3VxqPzd&av?;}cdJe0+TQUOIYO zc{z%TA)eZ&3U=R6liCrnjRTZFpPa;dBV*K9hN zi-9cRVslMaZVtX3r)q@aZ}6$9QmZo6H3$i09NSj;`xe&zo_-SfnP`L)NCs&CHqT9M zDW&j)sPhGjDQU$1X4Ly#tDf^PGn$r|QoWX8hgE1{g)SHgq0N<@ot>m&b&87~Ca({B z_&d_l2pfZCB_%$D=sriO*8ml#G%{}9r;UU@M36}e8RllpZz<*X@UmzLnxC+?icMh< z3;!FBFVz!J($jOejprn-D&N8Jro-xA(5gze+>a@nHThkLFC{DOyjMlja!{l5YPOtE)J=XFG~ zd5WKhvJ1F{e6sUvI{wxoH9dTk*jZ(v7$H7)wYTT;+@K{L@zy^-29*K;+y)B=9dzrJ zOUl|=hOw}>^jht?I63!A+u8!4$^<>pnQkcH*J_Q{TiPKx#P>Wj=TDbM&Uc+|Wf~-c zD_+~toq?|@uEPTi9v&VRKG?X{>Qq{uYDa%XUXYTEZgM#mpSm7-a(fK#8kXdF=uUBD zjS`=4{p6*2Q`{oq_U7NJLR^&3AfxwFeO=1*&yPsBr>3$;=PV&24i2{(+CQehFx}rO zTN)ZD87;ZiQFVr5`goT*J=RHzA+>(rG1no?kaI=S`#g-TSQ9{8-8d(Q+bn>KEhgSV zjt2h+QIwVzt3Pwr3HF+_(r#b4HIZQE8f@;JUhx}ov@*RmYR4QwcSbltud6}R-FQEB z#!82BjF*|zu#*Z=b4K#9nxi*5(^z&_S37pC&|tXTo&wLmbq}Z?+PiA8yuA^8V%*sD znw@z(FK+z2mxwSI1V@O6XU`)7&ImvYfHMM~16=~mA33c+4+7KdUr*$&i?s9Cq9S4QBSZ!Ef`cS|Q&si$VP_%pWa`iP#A8=b8KFF6THi zJG*-*Q}_#gP$$r#ouCBBYBw62lw>OS`YrrgW5;FSb%>Q4OXPbJx&?LvZSl|lRM`W6 z{lluV0Chl~=B!(y5x<|RzgivxkN7>07X9RvXL9%%7zVD7bjJ-;hL)F8hEV^2B(FgE zXSC}%+@<nHt}snUQ5 z*wk-1KIHxSti&(okL8#mGdjruWnVCtL3gXgs z1J$LbX%NUkF7Cy5weuOlAZYcwPz_Sa@Av=nYGt#X4K`(ET| zNE~ojn@OfB2SZ}-U)M3iMW~;dehv40=34R4)Z~(2Z;n1ME6Z7KaMCEx@lMlJE)Od` zdHp=;U18vA&a@+U({i(eyK409QYoK9Nvc&ANJ>H?zTu(gy=~RNQx9O6g2ApWe%CvZNKx&bJO!1*X=g=zzel$E}x@PvO8|)z?3{x{ZP2j7Ee0I zoWPMRenRi_;h!^@hrA?3oyQm|9iX0_6HOeU{UZcZq&`&TGRhThY$)tO;fD@ zp`hptA%^eeab(PeB1X?D zvKRUKWQO5Dh^xJME7`sqmu1M|TKRV$6J2(zP1aw%hzx5s)1`^q59|j(kZYXZm08E; zO-n8<<*!zSEG_po=n_ajF7?jw7%-do_086MTNw!(g(kGsp0k{%&ubkEbp%gK?Z6vz zIH{VL)E8Ya2%~k48rEMoGCf$XFgR=E1Y=2(nXzQzx{@6LA`0mWmD_Kp5=<^7XA{+@ z`xadrE0Rpo;IdWYf#sw)pllTQPgR+58%h&zb(|mQc<_gMWd~>I_LJqlvg#eGXX6h_ zns71fKO@teUE5blLrp;HWin?Thj1U=HMlrQn8E&6yAIHwQ|>BRfTNC_692UL2`WfT zpnZ>=s@ZA2MtCOR`FStmGV_r+ipwi6nY@x@_^196wnN! z$G>>b@>oB^20xI>5D3~BgB+3f@8A$aFmP}zKGkXZJGhiSH{>|$SHs_H1S=46dwT5K zvdI9$*DDDSotVfG^r8lz5R#`#P#N!{1~kl-4gXGU-t65dm?wVH+-^HR#Ffpe7&Q!z^Z&H3$|6cic3F^p&lgNA+4R!c^Yp0Vb7WM~f?}|d zjKxcO-F1VNTBN{NepxgI{|R`Of__TOHR-SDO(X|E_nAk=x$C-Gl@#T=Ps7GsRJ)Em zv~9IvAyRRKF#hk`-Hu1TBR-nh@R?kW6HdF96)(K7+@t^yU#bWbYLy~RQVw2p5V)G* zqdr9vnRp)}}cj_2;Pjk?A=vzq~lv^2Ws`JtgdGy59N zcfZyu8gQbtZkarY0%jlXtG?izeRxN=f4+OEQ%?xBuCmgHqoVul&}sPes4vvuxE=o; zrnKn0`-xE2q|8*``X)C)$NrgvDCU}otp0QEn7Q_=YF6Ic#}fgzvrR((R}=rv!f`M^ z&&npmu5b-c#B5ZBvc&gU*a+O4sX>yYsS1DC+>+uX9Ctf~dR`sKQTrABEZwq&q1jg3 zq7DKuE~Fy_4OLXcmC?Dw`s}jV?1X~}Qy*N=m*l5+bmjG|D-dYCL2qvut_>4Zz zD?Or3oudyiLlFP?wSRuZ>b}D_&88#N^1W<$Qr2PwKUFO=I76O)3f?TgN&OTuCR1QF zb*y_aLJ(L(wWC<%oKykA`#ShOmZhLz;vrl;z>GM|0y zj|B`w*2;7^?Mk!)yAdsf!VGMEnM=OFLSm1YTYS?9x!E@-GjNICP%ymZH5pZWm&TZi z=izwUze&eF_`=OW+b*;8vl#o9%t>^6J}r81Y!i8({N_@LKH-mzUF7E66M_enK_?B{WX;x@)QM&{zdi&iywf43yO@qkT?rX zpCj0+`_)em@m~nkmG7NxwwzCr(YPTZ8u@GDII?t>YU4;vM|H_YUT~{picJL(Qu1ZP z6&*_FTyO6PvJtqv@8n98J(`sX-zQ{V`qWFdan^6LUZwAQwv0~GN`pXJ z$cfxc-%&c88cNK;9rZ(VF6H9xia5co-kzxUMLryH^=s`~N2g~f=!JiiJx4j5cHce^{TVgc?%!nS@IrBTJd;5U z`lgKWxPU6PVsCb|%x{1HD}VE1DzruJ)_uEMJm+Xrz@a0ewsI;7%X{r2xyQ0^2nG;GGFEY}B9tikIm z{v3yVxxpTXhX6fky?fmBsY1dYi)7NPutXP{A=Y@3|UYR!W28uyrz+H zdo6m?6^q;Qj~3%653ZG5O)g;(L-`an@421H^>ric5`F&Xa>{YxO^d* zX>&UERS<4NzYh!w5-H{1UJJq3+J#d%jPOfi3`l@LR!mJo_X+y@Y8|#>INxzlMvIe{ zbRxL7i?)%r`NHU;Y*DB}{Ab-G$~9JJ`r1ITLB7z*XQVsGKQ!^+ICSK|%$Fe|g_uvZ zY3Qv4KH=V*%uA&byK+~GE(zMrma!gDe@(7^6g@YXV=qqX=a~9qwF7=O6Nw+r6 z-WM?It|Q(ggwh+{&jb$_zU`YUOZ*KFD*PfNC#l!MF+Q4Fe)y!Mf@777C5~fkc#*}p z-Uqm|AN(@#a!hOl3x|RLhZN&B6~Z60GgUG_{5bDEDeu(f#HCZlv{S zd|b9IM{jo1^gF!BN>a6o&)Na=1p~oBQKI1^Ii){4w!2q{L50lQ8Sy z{}u~$NFF|14Q);i(>Jps^_yFtH*N%X#BG&g3;1@C zD!sUX$M=G`Z7%uEqZ({eY>2;bkYCR2asK)R@|VllqtiRJ#9LMNFW7!N+9cROqL zT+@eEaLE5GA61vGn>mf_>JFB=GH+X>BB$_wEjB6-{xIqyr~66ANM%R|X}_PDLHK-L zkB1)Gi5?pBtE}u5WMXRc;aC^&tVd({aGN^G;k#JLLA{Y2gzU+eg`IIyia`haHcBXeshR(myMNO(%0K+Dl+Nkt>47yiO8fKM1KTW&sM1i7cmLS8=1SLW zN1qP`R=xJuVtygQLcFfB1U%g(6x-kE`G}V0TaW8456l_}w#0=0E-ty8smc~o)Z1^& zXv(Xu3GHt7Svg>Qm=8dMq03YDXgEBaxMSKTfl+;U=zy&J`9ZA1?5^j7@vbs|-#W7q z&ynT)zAg;af#cGpSb^jqzo>cIlxjM^Pg+JrM*0Sigyr44e@R&N&1D<)O)1wU=h$l4 z$PoEM!RNMDIzG^)_!zeyTlWhsSF{%$`J-@5qzg5DK}}XKAQLr;+0MZ-}RQ zOmgJhxtn_{7VDVrEiErq6qUIgr^b8lqjMD64?Ng!@HULD*Gbm|e8cW%9weWk(wh&U z6cnkNk;4r=(0UD7?YVRhdMQeKv6()Gc({4X!@wc-|4ywlA$R4pLSr%&oS}zGQB`LYtX+0K{MWRa7l`r`bHVQv8Og1~ z@q)Acab`FTiOG#WE&7}sZVC*5eo{9kt6 ze)+SuJ*a*a3UF%;P7sA>KYS|Td)jubC`jtB$>A+CFkpk?o~RBlHZch`Y{y8?G$Q;{ zQNPFhL~z&Qw)EHqM6_s)OM_Pg$Z$yr+)wc{Zk@Y!R*@ke9-6dTa&m{g=UB8g&ASS# zM|k<_YQ39VOP^Zp%Bv%t^=z`fCnk0p&ukZ5zdP?WxMwV&rqOM1caA%^Y&XXPs;0oDdxqMMysJ~Z$!6aWnJy}vbdOn9?$`?841g$4xp2_QqT-_oRp$_ zNj5|Uw}m-$!Rz5k&s$hvKdsH@k(tScsVU?fhsg=ty}1v6ES&W~{HP>+%EwEX3`Yfl z(9;Kht^A;^QBB897fqY`Q2hOOrN?~-_f+)1f4_&6S@ihcIYslIZ&~4MHSc!Oah7JU z9qG0D4!!^I*+$_l7P$W+Bm$i`KFiHT%`|Bk8!hXG@}bq#z>C+q=X-&PvSWJFEth=S z(fyv5z7*S?%8upU9%%HqArQD89HdUtsC0QgP01Wl6D&sNofGos?RnelVLfYSF(s~b zJagN5zRegLp9l(PrY9=YfG!Ogz+t`p=n>=sQ{RAmf8zd7DC^3rnP2kwPiIFi? zU!r?a_EtFQ5;p8RD&4FWH}&WB)aBvIv{SAfEhVED35uvxYXp3>ND?8C!8G>meT`aX z7@vV#Wi_=@yLJ^CCi$^neg%}>*SqK2jd|ZH>FIbfn@qpodL#zCs=2`-vKypM#YwJk zp%IbQ{#sF*JN)OW`Kw=}Q9ctI_$b~v)TDB2jRtHAwlAMc`Y6dM~ShIoMI9OTZ(L`6y2R~02G<vg_qwltE*ai6ND%K!PP#2lvQg0Tw!FdxU-n^OR8;Kf zT7!f5Ul|@II@hLm^!k#$A$h;i_?*Phy2OiW^QiTxGFkxzsdN;0>kPAO%%J1pJ#yB6 z_6=hSb>P7LIUqRBlm@UldM&!rpndq5dzqo1!SCte@?>dUC3LtI!D}7?{CKPHaB|Y4 z)ik(t^H3Pc0`w}Q(1&Nsnz<h`6n+uO^5_$Plv;L|4o3CI%y#7ymWZZ7?6|GR_C z5S1SaO+E*Srp2x;jFo5S(hLmtTnlFn=MYG8hC1q@pkZ{M!F^<+s`^2zCsgj1DZ{u_ zRj*SyZ(o^Z`qyv90}>mir-BxfKu|9Xi8&v9hI>K$D7LvqkBs^L5@Hhkj2qwjrLd6N zhypK3Qocini-)`*-sp0obxeCE0d5r2SQ6z$g#+GBM(XnQ?XsMeV>$HAjSVPB8`sDa zV4vyfVHg>CjnX-srt(uzk;N;IB>L7qi8!L}lJvuT{FK1PQJt>!W6BfO7iO}}*KgqG zrdf2hL{6M+wk=ys29$mRm#1sn!QLBbD0z@4|wEDJ5*)rwM8WKW(>K@DM{m z%C)#{==Pl&vlhMO4XC)@ibs}HwQx5(^4}S;S@gZSx3qT99ehy#D7-4C3@!7drl!bO zWkmSuGU2Bzszsuij+HU1rmcp=4*Yp<^cmJykg(U=D$n#m!$9XXSN{G{3+v-on|W*? zGawy(z(c+$3XUyB^%kdR_5IGwZ5D@eM-hAp5#_)~z(E^M;cGu-?n=IeK!mvj&J)HH ztZmpP4r~Tf*)rTaz5FR?Rb;J;m1LLuck6CBaiY(hb3ho zFgrVU#wkfsqT4|K$UbWx%+}9#+IJcRhrFBrt>@uN=87gnAF%{AAwDraesw!-S)~;B z=J=d+ye!=kBBU@p@=%m2nZl7e;}je&M5d~$3c5GH*+r+Fu?@N06KqWvbQe_brDwNj zVuD0nJH6^+jvTaprMKx^OpcnL@o$dih@j)1cl3O5Iele*)W&XF>EXYOeL>#3>AgE& zm-EL@qc0(T36;I&zycw z8PV!_K2J|A4Jz77+?QTbV>?D!6~Tt(jay~uZONjKzkZTT)REq)(CQab%BMw_WVrp1 zuWEKVhP`9c;?9vEc+k2oNGc0O7vSYoEsEBEhOj_#D$7F@ggF|Jir4ZJPRNZzrWB5#VRI?oSbKs+E|UneJg;R-CV< z?2@KXRTawCWOOy^>C321w!vDNziabF!~Q8K%o-5u?n+<6_y#00G@Q>}&-vn^3eO0OLMKAvAc%%bLE;t>)iV0;^sK@;>M6ASKyl5sHhpm+4q*Q_omxQ&0 zK?MpL&=7!PW!C!W9co&f9L@QOKOy7s?zTA888r#`W1A-qo8M|PFrDou(*o1bD3#d-cK)u>Q=20SpT1w^5EN-1uaf>$o>PzKs2G9Ey(1*A zXP7kSmS{Vj-yALQjXc~f>^>$8nx&>G*!iR;oYT{oF}o#v`ej#S_288wSNO3QK-L!&gD zt_MlxE;K5n9}AjDerp7C{qqbRa=7c9OoUhJ3;+C99123x>Sbf`KAFp$^-kq5oV@U0 zv7y)nK^FW64d2zv^dAM{-g^X^di5etvp*yzAwbN<2wX{L9qhN+IMk*>G9ms+!&VwR zP7N9KklfZ*5{RP4^K@F9<7xUp9SheyQwO-6BiUKZ$dGW;>G#lc%jS15E|Um z0a;wMav)g|n&;(_mYP;UZV?*xX=$eRk;m^Srietf#1>}KO{(~wyPhw1Uy@_+2;R^X zVPGuzu@Z0+K+vAnF5RmfcsgP81t+uX(8sLQ>wJt?()Xu8cmBl0!_=aEV!+XzerZX8 zl%!<;-!)>Q6o)gr15FKk zR9-+p+$q;^KYUFM2jKjH-Yc%|Q6VSimvUa{4U<0?&vb>gcG;eF8Xw`dlw^{JDyNjK z)>CqDncQYF*`~g4qGU{vs+k?#UaBc4rzAY9r?z6^eg6@wEB|2B6`9<$X zK%jE2D(w;M3DOrQ|JEP8<~d%>xUQ^nTxyFbDK6oxVI?o3En9r>I?~iq^0@y>Dx5?{ z!;+Gdaa?IxG3Yn&;&J1~8GiZ8wJkTO(`SvV-uJIn%oyL}k;l%@2k)Eiy!zfc;gl0v z{>lRMU=dEV`TZK<*S`|!sb7dU-d@k%h;%lB`}g6MPtc=IOs+u)rV^++5vHTzGGje` zSuO!R|Dbjz5qC{h&~-KV|4KXSsHnfM-va^;ptK@>rBOPh8yrDVK?E#9x)cEk$&o=i zq@@LvMo~&iVgMDyAq13=k{ViKsDXibHs5>iTEDy2z3chonLiBp)O^l4`|Q2Xe!ou7 zVS>BEV7ZLji2b4YNy8W}Ww)tsF*hUL8ul{Hj21r|sd?JaX($Mu^5QQ?&L-;z*Tinp z@0wveW-DO%VDRgWaRCVq7Ee~0b@zKkncZ$g=5}l;9*bi(sV-U?Iw8wDb>p8iX1u653UMuXFv;@IZvMT0d{%NM7iIY>pGx2&ZW#VCh`=U@v<&6Vc zqI%@1+uqAhF_XIQ-ktK~MkQbBAQ%|Wl##6@Ym25zwgf96)%j(#VHvcm`XHm>X6Bc} zG&FI3{jy(7zaplXVcI}F0Wau~Xx{905FK#^zH>+81>4*bs?(<+KT=;Up?mFS>+Vcc zN6+BS%0U>%c#J3#7O<`Q-_|n(hEr#bwMNo`JkdN>+xJy~zjB54?xw8u=W*E?8CdtV z#12>5_caxGdMj5T-tc*dL=kA#X^f4H1;eV!Ey+1!G1E)+V_jOe-aI|66!_T7#BFfG zoF7&an=M-cQ%gEeT#tdp_FwpHm1n%m z8beifUBAPg>qXL*)oRBm^E{6QFv5>S2?zH>tc~+UpO3vZFGntx^(^iFwNyG#QQ3yJ zq>nQ+6cZEuVl8G226WSDsX~Z#8BOUN%Kc3S`w(9+Jkr^GtLE&@_ak+EsPtjqd-{9a zN$KfI6D{3yho>&yk4io`Z$G{9K7=ZSqnr49tZZo^vAqU$RCKkjTz!+7g+t$X(`o1L z16Y!l);t#Uz;C-mv8zAx@%?}w3;oq4C+TUJj!F&U5)x=@+5Rk4-SZek1_x6g6uoDD z4?mX#5P`=NCHKt+cneT)w-Rb=()zh|m@qW|ztQg-Sn1l@5~iq5 zFDyJ9dbov+7fS40Y1h$kp?&U(ja`wK%qU|^lyC45R7$r|?zziK*8;)0NVRwuAz92A zy`9QPS^n{rlvj(+rYG&0xxLa6WSG{65C04)o)IHakU^y?>A)*LcJ)S#(l+fqTid$I zAT4ce$-ShjqPe{eK_5S&s{K3;vo51tM@e^)hc!nPA_hJ#E2G2iJFC=CxieE_UHT;H zUb1evd{k8IO4)L_(UGXtd}s3)OOOXOU|pO||5q)|Q~r9oYsy2CsMcVbn3(O10SVTm zQ!Q3;pi%Nx$$Aw(>GqQ^C7MI5{DL^&%0jzB{LLn^fUOTp0(#Qs9|zvpPNPMw0ljj( zB_{(;p`_qZ6Sw?aA@=w$eVEWCe};-aw^p-|a#8~Q26VH})~hcTPV>eGrgiM#bh+SK zI&g*pzs=r26}Yj%9CXL0r20%_e4zr3X<-KYanQQ7DDMw$M%gkjqRe_S3F?5q1;uF% zkRtEW!dgJn)2|ZCr|kPDXAH2r`tCHGlgc45!7f)2t>YlAuH9oF{^;1s;S>u zjWF79YBObWo!vdDvsJW7Q|tM+O~~!65emKBi+}7ypY?li@@5C}($#tS(oxF+RGmnz zYciS~R(*esqAIdlQcq^61UCuhR9D;8XV975UF=9~&#C%*fpb^U+aiyNPiPIy^QI<$Q<8*5&Ebz18r=K>=-c`Unab|BJ?{dH={wuS zUMBqeHZhNis2497S7DfOGa{}P3w<=J*!HWjE_Ch*u&kJ#9#pfc*4vTk8%c|&U!BM)F7{k+RdhX=`IQC5PGS!#zVzEB zRTbAU3baD$3ngJe3W zj6Wa}Pm39UT;0ujSUECQT*~ruV1RSnT{PQZ6SwuxIEn3v3dsrTpOUh!nAQFm8226B z5;4oHl*uWQ?6Dq9KXqfSc6HdFWTtUXnTniD+P>?1!7Vd1$*TNeQkl{gWh0Heak=(H zs|=>B?A;OfsW%|?UU(%Rjny9UU3hUAJ~`JQ;n~94 z9C#&H6}9`rso&1N{yb@@9;ycErXDaSOgoR&$8Q}}?&ROFs{Zy|8LIYUZpNe^`+3jV z^h3coyS*C6o5Di7S3_vu+7?9iKWeBF&kY`u6c1u!Y8J~eLK+;>&US2klKhLJiS@j& z2$s)G?UT>+3pIT-W=`#*iCLSnfyd_CQzGn7x7r6EGmkDJf^VV^E?SZ@m-hHVxfER+ z=(>IM0ZVb_jQ7l1{Dt6hyNP-DEBOWzrnTO^BPRNnd~ygnxfct9!scCH zMuF9-P%9Mpf}x}Nl>heaC#$a;Xwn1W?R!Jsmm*`v>;3eM&koz}nWh-s{od${cbqwK zy*o3d=YtBpSLs{(#X~fyd>0}Rz6?B7sqdR%|`B2f~@cjgSJd&1pR&k3@QAK7W%VmvoNA<>o zWZL=8IrP8JK0VEnrOI%U_LH8NSlUJRtWX?xw7)SKnGeAX7CPYs zPqb?;EH3m^mS?JsxdEDIZN{1_`ikEfX1AFimRI8$8(8I-$U_#zOj#?Z*+fyw*$#V&b8LwAKd{TLaU>a5?w ziF2(g%a^aK;G_86rUqAy^z-EReD|uWJ(EENFP+UNb9QAfdI@CvIsFSPVghHPIb^+4 zt6!XGxA(uJWlK#keC$D7x~K2!y9be4EQj*l*Y-_hcnLb%5Mbr3MI#^PNRyGRue*Xc zROxc414AD*nYQ*+=s#ybKKz;e^eU9dB2lRMe*2xEAVlM-(9I>6yxe!ct~!MmoW_$G zJ$wk2vL+dY|7D(^6EZCKv9bcy_YSS#@k%(EgQ zx%BK(FMV*+{I1m0n%+1(Rnqn7*jV&7(Fl~q2?;p^++D7A)*xh|c3`mJg;Bj3MY`Bno6k%|gT^W6K^;=n z@CtPPWMl~m3fCfrrR{`^7(Jdo1+nj++ic9oI;FEUd6SZp+8SfE<`Ksvq3YLm_WJkh zWMtQ9glNt=4=fKKJAMMB&}`q+81Sp45oMBh(Z)e6T9R5U!dU)gwV3{-@!lI|;l)Qg z#&1VH#r`#uFG`RWPOZ9R6g*PgTh1)t~-s z5W$tSi^90i?VYl2dnJwcKb&+`%XaUc!>3Iz$l9yTor50m`b-{%#0D!7sVCzw@K+@H z2K-OsqQ)U$8U?Apf!QPbN=5)>{p&N_FuRel9-EOMBMYz!AX1T$-8%8#o-~t+5k2kb z?R_8D6{(p6_$O7rZTQBqMzTsudn+{)O3KRE2E(b(b9jI;&GbqE=gD>YiiSpW>K=py zAXb1N9sb$K=it?xv8V8dkR5m=*3e?lJA_u%Re59%gCtrKk#Lj!&*oJDb2;& z+8yMnECM&RKU~+IJ2OEMI|J-&YX!WSzTFq z>g6RNDk^jF;w6NSpC2rFqc%DO5Ik}j94su>j*c1e@smqSA|vaJ*#M#gHUNECZb89? z&vpX+CftnY6y0X95|}UMSD*bIRrK@Q=knjRoc*38wVe5lv7HF2tGzv6RCn+1-;00# zfG{>GHC5q`JTKwa-Ma;ah4M;D!m(}jpFS}n0Yfo;55k04jYI}ys5I6Modrc2{K|P$2kpUWS*ZAxCfI6zUU!wR= zfE6?u2+7W$KcDc95eelzh99?+QLG3`8XB5Mj~>BsqNhbvR7L?W#?APpy?tkU+j*#L zaynMM%>-hCjc@UMc6N54OLqcpfS#UtrJb&W!#oJP&<9KAH}Y@3kx9kH9+B) zbYL1~A}ul@VdC-z)$QGlzrN0?#RPy}LdTeOs{?Hk{2O#i#-{X5FCA zJSnfNOzg6QZv4I8JS~?JXK!yG#oyHZ9;g9;3G3@~iySEUZcZOIy|BQ}%DR~`L8RvK z0EssmO#=8QsI#E~K`3n6gkooyEzI`cTYOh~Gg~d1Q3@N;~@ z3g^0IOt=*cQuW^4Y9el6D=I1g;8jpi5P8h?#D=7KMJX`M#7t|~0Ldi%=+dNM7y%k~ z>Kdu3Ne!f7xK8$l78{);OnVFmV8~i&d4mT^&Cvk(ijQY{0HrC`(+v$O%F4W|w*vqA${@ZBX4?%cf#c850@urO6s zKp9I83JStt&~PI4uo=NQsd0G)r}fr2 z(S;y`f6UJp)rR)WMWKbs4KjKxB&(grl#F(H6*EYs(93}T1^j@i^*D-r8FhaCFa7=a z(K>fSd4-3Ehr83UKz$Pz6l5Hfk(Yl5REQ9yy1Ke}%8kN)07R_61=S&77S@wps%`<) z0zQ!U=(3Y|@W~@m?9Or2tnmhmqq}=0jIsK9W#oKo2%WRDGaZ|_CZejkT25X*YV|TR zf)Xqi^wh+v5->4B?6K`2a4>bYp~0esV&N=5Yo&~Ng#srbt!LIP zmQyu7G4b-Jj^ne^>Z+=l*YgYV@*1`{Q?Hd`AlidAQc_fmaD?jimF4BN4iX>$PnS##4x18{e{usckkaH;rw7NqGtSjA??fLrp zbRn$(7X`$m<;k^^NSjL?o%T{`C#a8wtwOa1Fh>gP=7jI8hI>|~vI zOx#>BTp#_5Asx7!j;aFgAm~!f4>wY%si}1?NUNxv+X)f7!4%W2LFSz z5_FV-larRfBzw9=YGMcG10YK=v)Y1ers9JD8Zd?VWURtADkI|`q=Su3-iHr>oO|m1 zYq~v_gM)(!2~tP^M%YT80UFcL+m9b*xVW+}ip|Z%Ow)HDr*0Dg{ z7+}4jq13MNU3Q>MRZ>vsoIlUPQry+<6IpoS?|9?>$i$&1G5gc2PMBR(P`hOc5zNfY zynKB2zr%BGUBDl)_@t%RlT3mcGjG;ZRb^WF`}7eHewF$Is83!>3Yl*}Ks5m@sMxU3 zbLDr)AdH8_g@wr@x@rcX*Vx&?Yy+Nd1=Nv1V*#ti$Hxah0%Fy<32$TLbQ{+K7hM%j zJ)%2N_{}Xq)e0Z(^U%T2hZp3X)?zU;GxKA?qXfEi@4!IY@dn5!%gf6P3oimk(%T78 zjZI73^74Z)f1|L_YvEJrLz-LnwvMSTEiJ9AbO#?O>rq~>gQcaVwJ|4#O(^a+5x_Dq z3QnGk29qA4nL`tcU+&^6-5E4TkBy9&m>+$5rq_RujI`&SXwg55hiMQ1Q_oN+kTq!Y zFRrX~0b&A+&B%Es+wsqZHjTpmbvo`87cdWD zj-;Y^eoOEK4Nc1`L4_0CkdnqmRb}OUHq7oHZ+I4mft>$saa;fpD}1&88xiczegODG@{fVIzPQ$0CSk3Kw+Le#0Hmj54}>8Xu8_ zL1@0@u+e914j@znYT{VLkB*?z&g15$?#x!5k;)DyL}`2(z9ddXeyy1}tf7RbNk2O0 zbe|C71$k!*y;Qp1C{c@hs*@)-u-Utrn1(dK(`3RSuW0!1Fs3p}qW9tkQ8{xG6ikK3 zeNItcIM$g>pC^}1d34>HOOL3Y3z6XqLzi)hd~j<8MVot!sb?k8QZJiPeKT;90Fs84 za-23*opV+)Tj%H`l9nPXWx=sjiHN>by^5oIlX7KfDf%fp+XWK|GgxyZ zQYXp)Ct={kl%#XX;z|JFyN#L=o~`(8`1e9?NyBE8R;-p?oB}$2F{0=tErkIYPUQGZ zw|U-mUg39v0UiN_>tqXXcxd?b5^io;3$j%@f!ynY|Q1$4f;P(b7^>9`6(ox}Jp5uG75DL`9xPhD&6!#hT}d^1h7;3@9PPUkhW# zN$_IAh4Z<&FE^vy-n=42eG|=jD}?xVS~6Qx(CLuZEwU((mO=`yMECs_Cj2q)+k(IV z6*%>I!kh$eY`E|{Zf=2Q)Ko4JPUX%K7D0Stkj(y5qo)_dzK zQ7iH^6DuVvRjQugZ5ed@lqMw=`Bw@Y%kf`b=6MLFOP2zmQ&C>Z$Fp_4hf8XlYewtAUmDpGHFS=sUB+;8db?(R|BU#{#? z16p313bl|34B5bTp#K!jmy^*#*S*1w7SK{+IzK^DJQt(QuLR{UZ|}UDk?$V1hCl*H zNAzJZx0Jxqk;gl;r3Gmho?cla5Md@v61ms?Y|x6Skl^!(7$yPs*3i&!of_tK@XTRw zga2#2J^Mrf4OZWOcQOnd5gm8$#1$zSnU`^KYu(DL4L{ zb&q+*4{6}w!EeWX<}x2{({-MFYvF%nNl*=+KJDqLAAAJf4R92}eIM835>Y<94ATzy zyugn!H=lUuNhwI1FOLD+B`3EHUKIHHDx9m}*^f6pQ%>yo+1Hnz1Jn8HN;t#PAsHF1 z@X;Ot|DV9?&A&0z9z==3w;Ubc1qEFOYiVUQwHXtv!o-vX=GUyjTff};%-OS6<@Zr) zXjsS_9OP2=eB7D5^LH%wmLQ89^SN`&i;GIR&!Z>u0pkxgT#$8&Hw6o_VlasbBlnGr zw60!_eYnVg-n|S=nS1>+9>;=tBa8MjwhHedfK+ zi!3TG7Lt+ifS?$>1+TxO1PGQCz@@g&<>;~DWfn#;A!TJ{A(%*zg>>41(veKg^fFX7WBlDFCYtoD2_+jyT11QvUxi zI^ivR4z#5|gk7O?5VFGT{}oH|JfMAg0bKTX@1ml>)rk!ckLyR+(}sOEYVa<%=>p6! z4Bni!+^M)h;?cJU>^pBff=xhXqP=~tac4ZEOOoGD@8(UI*oW)f3t3lAz<^+2xM6nf z^Wb{fzp;y6Fwia`M+WtX{dGSCTyVT-aN!_|6c!O#uV)ECmRDDIfB)|G_=~J`~<>2@|GYliIYku@q~owIfc9bQ;CSLZ&L$5AYr7DWgHp9f&=fXFg7?H?U|!RY4YB`+se z(CYy1*?+_p5N}O)QBqt2UQAR}6tS&s4*zvbH#9kUK$%$-V!4BziB*VK&zvbMD@#>J zH-W!TyrN)uTJ-*pi3tg#{)q_;L>yYtSCe?^3A^JZLY?h%t(9}YOgq{oqNhh*QZnuX z6?ARkB;ki@W}MdkzP?I8H@kL?DVts-x$fIUU!MW^D6ZuYdgtfc;&ENIscC6-Kvi!r zH8q9q~BUSJQGC1 z?p|Kz4c_Z;0t7D54mLJ3^{Y+w5N|a4?SQrK2T&uBY;-R4W3e{`S$_`?*VWbrhlSBm zQh-Mp9v%)`CyeqXcEHlGDaQX9Y$_5$$9CaW0std@fX2lt@dH@-N=n9Z#~lZ}eS8iM zRsr`H78<&@zpqYQyG%uY%{Kl$0pc}t)Vh|1g~ijSTNSogFtRg`{9zXbNY(v}ZRSw% zegcM~QV~|`FGDr~D30xuSi4Ff{DV)P?*M-Ijg40?{!e`K2ej(ekgNehvl>3Xe*r83 zErwE_jj%L12@ynS3Wx=PY?gVmJ=+w*cKCHx7AF!)9RBaHA_y?ANWp&ti?cOJa_JcK8Z41na~ z>ij%(6(Q$H>?|asAQXppFCb6|fm;WS!bQNhNvXh$#Guz3lU)V1mMWYsI<^`bWFg2F zhFdyAzf&Io2oFL+I$GMbv_bg^R3$`z7YAPoEF>^L0NWlGT);{r-mGFsO~u7D*VV0WWj_FVJ{ue&*9++#^3c`63F?5{=zB2(xRfW>gu*2%gW)6L*7<^4UUeQUPy#8 zu@zXWcXkGKgDa}t3VJz=UNbY!6zoyAbFs2whOW2s2&Qd7OF_7UzxBhW-oe3P28V+r z4Wy2B%*`>dqX#5gM8-qfry;{zRde(q%}Su*X5cGr?(C#2y}N5`GB2Isz1dVmmv(%xwS(dtgA0pPzD-nUV1plvCcm&F}oB zuP=cy#ff#}XV_2F!PX#Ve{7ZC5ujHoqDH*pFwS}xJmR$hcWL9~6fMG~;$3Fn!`T4C z43LOJV`E~>5x}TAqQ@_RSM~~6xg)(dcUj|DM5xhn`$vBW^1P7ji&(wFcnpW7 z^*(g^l$w#@UT4}M%7}nyEJgf*Fe~l)gA4FQSDW@fgoPbP76EPo^=2A&U&xSiYQ;`G z+Zh9dG?aQpGfK(HwH1*qExzxQbRR#V!5>oY&K*aHy9>6SGDj3B%U` zCtCnW?!b@RiCP2gMsYt;%m^sYIl}7A(-o z%U&O)sVyxhAz@%*Yz#Z2XvCQqv`Nay1iN65-v(rVx2BdBylbdNQc#$mn=6EVgiGWM zdG+)iVry#)dN^bgxEZy_5ScdtVVp8q)E1O&ySLe|XL(2-M`Lj)!!t}gR`=uAFDFMw z{PuQM7sh;TZH>|VV>}C($&nF13yXm3`QNMfG_)YRi%8d=A|PR{!lF9>|DS6XhsS6l WYb_%`WLUy4$*yVL)ck<72>dVNDZfwv literal 0 HcmV?d00001 diff --git a/docs/source/_static/images/nd_spl_3_comb3.png b/docs/source/_static/images/nd_spl_3_comb3.png new file mode 100644 index 0000000000000000000000000000000000000000..b50fad23f5650880533920136234254029a61509 GIT binary patch literal 28176 zcmXtA1yq&I)4zn2lt?2X9n#$`U6Rs`bV-*8($d}CrF6F--JR0iUElKl&v(z^2>a~a zot>Th%{&Gx%1fXi;UhsH5ELm%u}=^P)HwLjL4XD$TpJ!%;2%Ug$5$z$3%?{Wa!rkCe{q9~cNFCxTr=5ub!2ku;K}jOoxPr=lvz%M%wDrvM*va!C8^ zFC$eI6@0=$Z|B8&dlg!zy~!eekE?^_mZpFJxF6r;ZT*8Jzto1JL&f~{e0p%Rv#ZR= z$S5u@4i676Dl$J_ZgKkhl`D1R_g1Hpl9FDlr<9ZwA{v^ys%lMD6#=hvX>c&2bO(Zc zW_4%~#?OyUF2}k;LP9z^X)P^}U~hO7eDcCKYQ<}0(HlSpww2&p*}^7-lM?nJ@JWRdbpn-3!cHx7hSAw4xN z4%|_0R@NIlJeoZ_^xz++O+h5gxM6cuX39mAp*V~o$4K-%X*NbiMg|6JM@QlLEkW&B za}1Gq{$ig1;VKh7c3HgtQrPSfDf&{!(Y+FR)+gqeA>VH4pboHb%v7Lvsd zCRtI@9Iq|yTJUx6o0E>-UWrLx5<|ox=Qj+M(;*FwjUjy7aImo2E$(sK3HNt*XEAIp zhx0x9Ojk!s1Lp{m9s%PFoj-2Rw^LG5Ds)>kCw<%Rua5}{386@6Xmrh7IhG><=*mb=p0Chr!(%mN^SnCW9&tHdw!XkX`tjR- z{ZBkxY;<&SQBjA_L=2UJ^TF&JNOW}c-Q|9-<-yOt0|SC?XT~=tYo9-V{vw6f$^EsQ z1YDFA76k9?4+TYvm&& z6IE7@c{~7r+OD)N?NAW%e4YJm@>>1)ld>`y1;x}!)brClT32O`M1+9rN&BBa0W&8l zzuYhP;L(U~Zf>}^xe@I&#J*<%$|&i2g-4AaXO7(JA7{A=+wu@$6;|J z{QN{eeFB#9?P_9Bu82#~dwRnq*f53($DkT4MQ!bOclPgZMy+tn$ACsN=(UDLBLr4g zSAUEkIy^pR+!I327Om3Bbt`RH0Kyx;whLxj8S7BKd@r>*WbDz5eRk zGz+Y51E7y;B=Twadh>*i9tJjl<_k55hxIda^TFgDI@ zYkU41LnRKrB?c^GTh`bYU|NBAWuuBU8{kf7i3Vl}`edV_NGccheS6qiZt)=CutImj zG>j0*D%EMKHQ*(JH2^2vz+sbkOLEZwWsaB()&YtJo?>rm`|9hcirzPp$pJ}ta5!G( zr=z2L>MbfM(Q0x&Tny9dki*Txlg$G&j+s0| z(gq6)`*6FR@em{7jo9`6*+P5)vfxKD}bml$9OK6!yD$CnhWm1!;6X*>j72~CYYHZ}%m^*H?}zS8PN|KY=Igu0qq zI*(KF?vF*c^DQS}x976@HvYu;0XJ(h5D)dOvC-Yw*jR>{ZC?Hm$jgD-ujuIU{h110 zm!rjvezPw=@M+ZirWpq^07tvOR) zmY=cXHd>}tS6E05=OTMmbSRC5SdtDvA4G%JDZe`&vPK2zBJ+lxNf*_KyrRdAfTYMIBv^}h(IUFa)3j` z#B?zoOcW>pbx;GbRCTr09RkGI%nV!!P|bwr448qgKr|}3Wb(o>bi{8q2|^GE6Y|99 zD2d8MDR=^WK>GQ=E)XLDQ3LdkY~TOs?gsV_p?Dd%=G4^GQMtF0qF*RrpkI6pyXhdV zlEf(iMsT(5ioo#9p@7d*>a%uIdb;=Nzn*|sdICyClYkl|3pO#9%sB>V2s$1f9zK4* z?n($kWLB1eiVCLY;I~Q;bU;H+F89@;)L5eezJC3RiyTmxBZL5(rvnSgEG#TE8%YPY zhsE5|vdVT&2wlh=#~?T?Y~x%PmKF#Y1QfPsp#r@JaAp? zKxx%Ixf>s9*4qgyX1$k`0W|p2@DrL2L0eiPpao`=x?Ug<2?nHaiW6c(AXc-GG~TYh zsO)eUbC;K?=IbAKR*3Je;USUH&@wW!zhJN#Fzr=7+}?t}b3O+1pyn?*1?H^hNSr)#2i_D~86^H(wtgN=(cd=#1mT!rOKxybIHr4k(2R~=0*l|F)D?+{r{zeS*ZJ(#_zL?d3UXgm3n zBtWQX-Qj&ntKEv6l{K0Llgj_v*Vpdq!}6(tfxfMtkCgR1TfIF@YMRy~{3!_qg}CD5 z`kTMs#(@xw0?!8&T{wJa|200oz9}3;-x${A=vx6*sWu`S@%J>Hbw7Aie~;@M_oik( zjW#0Smc5~DZ5T8({}E=yz0zxK&zHMgQBxImV9|rB{=m)sS&_hxG#UyE4;}LDXsM|n zHy6RG-g0JjFzGIJVCw+cm_^b5JVAgO!Pul@vH58s620A)_+bwV`5l5z6EtLUTEvrY zX9t0T>6^bl?BwMea|93rf;0{V1;n3PbvB5m^^xd(ecy_US8T4`pNZhm0%(+zxt}Je zW@}L@zFFF$Ic}}CtOueoxiVGq$YA1#YQGD^gE>7N9nU}A-DR@Uc$`^VQ&fEwL$&@C z<0@7#(fgBtDlF>F>5mtN2xTd%sH!p}=O7q!SeU$8Ue>pKO{F06{G_N3pexsQ%9 z{$N$-<5TaEP~kE;<{Lr0-zMMyTc|QR3ej7+t8TI(luboI;bbG=LXVDyK#EJ&+M&3- z@KscAkuqQ9<>m2+q)3n%A_2RYo|k8$rWOxAkUzu4MtuYK^oN>ST>}ZLyNF3im2RxEsnogk zU)bi+IfbbS^+O4*4Y#4Xs>OjDP*dNdb6;pwDqebYgfB?4HjuB@%uX+0B_7=sWs5Puw9RvoK za47g2+uKxo)c#$)@TMa+_jM*rxFD1Hf(J8>0DK6)o0}U*7MT~Um^pDwhy4T5w79MQ zVyu#1r*XZX<)t(|-<=PTifaDT#@RDTG(RvS3@Kb=?Gp?u4ht*<4?$AKMvRYi#wMe-7sWS5l@ z5)lm>vw--DJSZ$ImEVJ-zd=&@C8ANJjs>9{u%6nJzLL;BKF_KpYQa26{y~aSP`4;Q zKQ2@nbOEytA|QATUU_LL->@*q$1bMAVrUG+jSX1lmlz0M#Xu+-$XeXzgMLcMjR~#%5SJ(D<`sQXPCUjZZ%r)z#^*+r7uV0Z@6}c0;r34iI z%v_{U!0_S6BqX?e{d#eIz3q}YC~M~c&B(~a3WM~Dg+)jcKVR;xSlIWI1H3RI`Y_+; zyM4xp02EU64HrQ4&@A8zNN#SEPo}4QbI1;sQFIz71bc^Q3_QHoP(XJ(uENV)KCb_~}GuqjWwumZa^i6*Qvu2x5Jc%l)XS5-q-)ZH49% zYfL0bshX#7V!_V8F_~p3adF=afbPxD(-XZTLS1U=+8iKZL}O;I^l8g1ZUj-8i;If{ z&IJ0qoeiO65TQU35D+X+933t&Szt;?Oi#8qyD6$p{eybE>kxddW#^85c;p~4-9CrRez-!%s@*{hCmt`MoVy3IZ;tx`{hXNU%$gxe2SBa z{~)ZaldDH!sp@tHbNH3Z*O=%NB^rB3CvA*ERZ3_(2sFF&TS(b6jfhX^h1eZT;S(+{u6H0Dl`n8?nH$L#ue8LmT0!M?`Cj^=*%NVl zSh#ASI5)R_A*F&E9{I+}=VZMXofz%UAK)VC)S)5T^$-e*O1JZhb&t#M;e^0Cx}vA1 z-mJ)g_|g)@XV426h*4h(sbI#D@PBfEaIC(b>uAef(C2#!fp!}zE*v(tzAf6-6?FK) zh+xMQR-+3%gv${Q2`SUXR#7crR53CJhA{1}#Nst}dzD#a^4&tK01l&w!TZR+LX<#0 z+lVsQ5Wjz~NAY)Z6tl3PotK1OgMG-}?F_mEb$+C}VO`2V!90QFnHBGE+nCP^X-u zrj@8;LAbTIzd!MX4S->LcJ-L=9u{;6ps_&Qg^rGngF}^j`X<=lv%)o0^&e z8Nt!fQGb8G7|j`ybMN2A#yf&oZaRrd>BbVZGJFoJM3CCXj)vd{Z2-U}*U7}h1bhV_ zA0K=Rq&SOBE)|7j0Q+RY%gu$Y8DNZ*B%$b09cJg`tgERx9M6+6H8pi|s?qNhnulUr z5I>omn!>`yR#R6mDJqKkYYb^;vijE@3;-&O0j8}^Yilup9KR901N@hLPA2y zMnQh_8dwQ>09^o-A|@^ltou4x%usMxGanxvT_0RU=ZTmJNL~O%$Vf;Q8slYUWsg^f zS|HJji;cy9Dn-wwdC|CEAq^lFcNntXowZ@Lt>R9|D=#kx$zyglbe#Je&lgif4KhY* z>e12B_J)rusH}|g!zOf$j3YpG!7WQ%J@z7;@q(pR*yy+Z=`-Q3|M>}!8?yd+xGV!5 z(VS03GZlKaP7S!Um!vnN(r_CI$;ru!iy&*G0T7LvBLhUSXcCAebeT0e>Q&<_Vq#+X z_?=JGm~wJM#DdF#-r(RMgH9tgpx9d}NF)pP)vFfITZf06)8?;R z6ns2s%Q+Hgj*gDyrWeL6pLKL}q@`&A-}Cc7x~{$d2l6A>%#Qv=Q^w4-97Sr7WOHzE z0A0@;`_hS^paNj105oFn?O{TqVrp^>DJ7*hWqabuA~j1_;U$I)0ziYjA6x-MIrYwF zTT~x6U?_z%qCzYcUse^TL`G53p| zEN}%kr$$d&&zYYX#}SebYxQpE9B6XXi#-6b1R~>+ew@-JCI+tRPBx8)oq<8Vk{d^{2*(^UVdt91U(#P=+!Vd3Do?2P(~k@CBqtOCT09K(EfJP!awoy*xjKjr|GV5F<-1sfe2 z8tU%Ot*oqc+#UwtmWbDiTGl69UjOlO|F=@ibreocPQaD4xL+EY`M~+DZEw2(jH|m_ z4B+TMm;op(oTm?*CKlG^$=b_$05%>;(@0bP}ibd-G-k8w(Bf#1M3Wcn#tv`QG0+?1I zjCrYYf&7mjKeEIBaTcmqbc>p+I|O-}FoXl(Za9ti?H(i4>ylB~>bR;esujlIDaMPj z!^8yo`hsq9;f&(bGB5y-SXG$wg(yz?@pL^9{1yNVcR;{Ld3kxTQK6AAEMlI{FRIz} zhEB!LpK0jmWCmv;LYkV5rKP2{wQ7^TY%l5pha9U_+s5qf`C^%+kW;I=Wklvul{pmWq z{gK2M#3b4HxVW;?QcNcjS1`d4Oz?i00F;4_mX`F)IJFFjAlNWFJR7TUk`52yQ6`j) z0=q}MHY6kjpr;rQwlJY#VaO#!qVTZG-ivGsgxu-Xut}F z<>Z#Iaqc~Y(oO$_!$KOYO2bGP#-P8YrfMrGMb&${O62LD{%~grg6gdO+%7RliF|`F zn+-*5u}Ts?YL&3lU6MHParXa5X)i;_Vx1A?4!}bG)}7xX66x_zvOZPJ3Y#ZncK=FT zK#xjcS|zgzh4Tbte^i{OAZ6A}TKMpt)1X~q0~_ULv{={veS8~kw47{c1In9bL>PhZ zbzj;8rCSW4@^lQ}tQaz+Xt06FCt$K-Q3AjvD3I(Kw|)nUzlHNO90QcmUSi-$H~Ets z^=NZ2TXcR?_@K$QDw?O;iLt(77yKUT|A)2zH*AP;uZLz&1qI}=y98`!Z$9=7EzTVg znC+;jM6lf~yFojZ?tkZC%*sqh&VHY7jeQbq^s?W~f1D6P|58f_9j!8G@^%7nqPtui zMnGkc0wLCUA=w`&M0fSex>oMSeOi|G0@keTWogjN>>* zo(UF4{PN$z3%JrvouqT%%)#bh0qFnEyj)BEYgee6ycMj#3|5f(_}>Z(rsNNG;^bg> zA(H&%(0KnH+IB(6Ts?ALia9~9Gnr{MCig$itIsMb!1+e?alF1EuXx$d670urhfc({ zfE9M7JZKWr2UtN~`X4K>fDXLM)B&s@{|)B;eVH5Q{sZK9t<`lvb)x6}p)$m3|GQ`C zz;TA-`M4q zHT9uG0sjjtTETX~w*PH+3l_$icVtc|O9KK#b^5;>&6qboS{3F65>yid2%Z=HZ-s#f z=f{Dr24~$t>%vDcdxq$L!jcX=6OsxT)B*gN@&4Z+5fBR3d4rT!o`+?KFs;s0{|QAq z7TbdL8`>?7aa17kmg@i8PF-={ng|R|{Woa)zrk(AsH|U&jg6h1oqKy0z)IM+=|&i6 zsHv$rIGiP*5$9xQXXaDxuJ?AwZUpr1nD6hP;zBS(YHDiKGM0gRni(IraAxC4D=#Xd zITC!^QCwR3fsRgDO%2_l-{ncFSt_o$IC zu3Jxm`cC^kQ9%=$H5W5mP^B8P?D?^8SPvB_$oLD*EM z@QL-5W0cz=-)3lnce&>>&K;oB;dtlgv=-(Qlaqcabc0ayc%`kRxVZMPEno-4Uaj7b zA?GtePFoWbJ7dhuuU>`C&o?$cXWSaI1dCDr#1#+;E;c6FW7Vq^`?P10cDuG_04fDC zveNh#hPj-QbtwVv6|F50p%#yI);743zavJmtOF*N(O>_+7og2kdZ@oIzeO%A+|&A4 z-- zl#-QBWu9tPj}T~K^Dw`jh)S^B8hnn=NQq0qJNqXkWj=NuWEP)8k3;|2*urAol$G&4 z2)gJ%t)j3HIPtjXXpB9Nu`g`T3*Prn%YycMH;Br;yE|wH<8cX1)RvX_?(;J)zfRw* zDG5qT1FVwSW7^8#fgaOt9$YLX<<{1amaeWDm3*>5r`2NR8g@vC^kR5SN=(v;z?EwE zM6d9*wvG!g*DNCMGw@(Q=)QdU6122O4eI=!UTBAcnI9@DJLhe3TeF(ON;Bci)YNYL z`C3oe5U4+GGo*5|v%a0|!^i=F0w_d^ijwj0Y+V#hJk>p7kLZ{(dk&K5kWq-2hoU8M z<0HWP^ZI77@|zK4-Fj0z*~Mdi!}zfMoFMq`LQ z6?ar7?rgU%Z%<4JHZ~Eb*jT<~AmCvibe)l-w1pLE)TqA>p0_ZEKeXwxc#_qM(uD`d{+B$yyirIt+ z70M*tB1?I`{OG~JKww2!z|GAI@-D0NF_liO`JO8E{?>$ya~`U~4Z3gD1Jd>BMCq0s z2Sn`d*`=k50$J)RvG#4Y6gO>0MYbd2~0U+mB;?2g}vclMr=n zy)q<2>{#XHX}e2+TRoJ5ft@BV=sdZ!vf`64KvXgov+3`+g@cWwectA9-;%#0o|NK(wXHdOasO`4C_T`2xtZH+_?`(OUnF* zHICRj8tcR8pH=V72nD_-X0T>lk4*adMu50qS849FBZsR*igli38~R&m>Eb^VhP9|7 zG?*?FcsvU7QCA{qS@o@S0XK)6(JX>8v-FjEQF74j@wM7LknkFRqRNMuzN{|@A*!21 zWO-Sj#p6mqk8E-nV3 z5;S_>)7Fvkibp#_Why)`KL;#D9gb(I+m;~+414ODHpx;t7GGAMIUH1^NlZ0euHLrX z#@=z8j0{h$_iCkc*{7D6S0rLPCduXx#TM!0^0=0+e0+13XMl?3$(Z!Z@qCb5p4YiC z#C$V~4jIlUe*Iwhx6YN-@<`Vvi}39Uh1*cLASjtEt~H7tcpvb^x&t87uS*sSWz$JZ z{q;#Uo*=kN`lQG0y|@w`9XIat26-Mt#A@9Z_Xs>Qm;MO_f548ri-}eT*80Ye({_<> z{5rlozJid{@;jO%B6?LOEYGZ%g4U=xpP0`B6-4XKz0f0B3i+N}KESZ?{IR>2x#&dkX|+BrOZ zbsAO0SJ8!48#%kW51tkg?-3{F*xM~G1z~n*<$WcW^t+j6kvrk_4qJCYr z>6ex%EDY*Mp|AgNYXli>{g_s}1a;lBW3T(5RL9R4%j3>gvqP#|o79JfxV7Go0kfQl z-GdDudU}2|)m5ajjg1dkyW|vfS0}!P6ey(excr-30ys#zwwx}1vS@ODYYQ6_!}VeD%|A?RJG&G&Zs{hY z54~3eOvY>3HU2{lAJ}ng4Te%bCaHU>Z~eA-{cGE{cJ8u{TXJ{BlDnyb2#2 z09cbt%{g*+d%qkW6omP5ipdgAQc}aL^Uf9t=MbggR}jCByZeaav8nZ)L0gkrnGJUv z8Cu$Yv-z-k7e@WjOyPeTgRdZPepwY|au48{Vf5QN?JL)W6Ws&vGbUytDVsc921D>h4KI2AyUfB$yieCW(qsJlN| z^W(Dl7GRK=*Rw;na1~?@=y^-nQsgJ6Jza+5M8fBec_VoMjzLXBD|qQ+!`US-D`)`< zd}G7QM;kpkhRKdl`nf#_Kqmn%v_?Gr&nlrq9|S)-*C^{*KDcjkz2!f*nL5im$Is?1 z)s(0W_%N;xxy?p)}ZO6GBSix|U zTWK7Fl(XJ(;(lwU^sD_y1B668ipObp2f1mNt4@h6!+FHrwYcya-<(73%deVO_n$*s zO*i1+1TdUPQahQfqN>h=*;9fXiSY%2G2w17Jx>-?2UIW|+9XsQy_Lp0NX*k?RrMkv_1rbtA!=Rgm zynL;O=jwcwqq>QEe0>5Ae$rZ_l(sa=SDq!~*^}AcMi_SZl_3a|8YPdPQht^>`?-+pBn>c0Z#HS?(g(2kljwd|fizTL0ns zzZ%0*)rPs1c6%--?ye3PnSmD7md!QTu6PsS*HklG?iZyvmnT#Lk=_9L~~vd5dNqx0UkmK>)i4sLj# z&FCxz^;WnY5gSFwNJ$+w6j`CS7Axj1&N-WN`*IhXZ6DpQpB>lsN$d>Srcwbp|J zQtA+_-hS|1Xm&c!PlMH2dE8Rf);M7$fg)?Oi_M#h-BmMYaZz@-JhBiCKtV!9O+UM@ z`MF}+@z;pRveLWi)+uVPO_HP_GAxo>KV zo38iV#DB>DB94(6NV>jiI_E&J?7aV382b0(@`915LB6{B{^#jo;&zs3d2eptr?{!O z(CHz1TAHWpyPR;sTyA22z)4aCryRXMqZ6#ALm7w=x0N#-UIt=*o5}n%f_g$K&ijRM zCmX3?DHP%d)di4+3HnTmLdNS?B24`$wnttjC&vhunAiuj z;+~zc;d4j=z)c}U#EsxS{juzK2nJ-s)N9>yDD~@XV(tO#k-3DUWA!AIvq9xR#@sh5m&9Grz2;N8$cQFSC-ZpqHZh zr*Rckg@Iv*WTxOmxq$%W(e!4w%hQPh1SbQOu1Z!SGY&@Q&BINFbogGXR#j!CiGyX< zL(AfvvY6)5_Th)kb2a*;Dv`bC%kJ0_8b~IuP{e|LDb?)Jat$xrK~X03+n~h6#P?lc zuFGM+%gcw4|D^SYNV~al6T%!uNM)D~)?(?qO7NqK_)Wp-3FLvU&|hY@x&jz@Cs#x~s2e!Boo~|Cr zJ!HjySedO-7bG%V70d9RZ)k7>aI9jP{_)+4u7P#$`9RCjirbUGu*VD(OYqmTt1_<{ z#z7*%U%Lj3%WW;EH-ZmY9K7@gE22Tf)ij?zEo^ZKBBD}Gmc1PtV-8J=KXdl!eq~z` zl!&7)zf2y}XI7EQ^VL1WZ|ERDP`uUK{`?39EKoi>u3nPFs#n~>p1S{}{t=nI2Djr9 zo(#B);FPyXvXog2rEd4G=c&~D7W=ifvw0;IEq1qj5RJC46A{FDL*ZAHcZYrp&;KH7 zjpH0gmiYC<7KuNQGN!{(3DiR zdc7qu;2Uh7Baa7dk})nB+KHZ1+e*uK+cthiwUkM5^x28AUh zO?FrPH#A0bD}P$N&G@o=SoLbzZ)|bSsH;T-Q5T+|0o_8B>1@cpDQzvSDHz$4iG%K!xYmq4-I~hJBxXpZ@UYEyC;RnHr}WC0pHOG-CR`)u)Qe z`qk0hN1Vlq-wrlPQt#3Hc2Gy#ytZpIMb~u(_i?M3$=p1wolWJXln{AGyq`|;noAe$ zDit~8CqX<8&mcW*UWjP*@inpOH{wca**K%9$K;Xt6Xe{m%Fr%WhtYs z7<9RXs_cKrtd`2jf^rmSHu!MB`A_U%U!!36+||_p&}5+Gd-Xi>aMN7}^3tHS(E`Sd z#H6t3UFwOGRh+SrVV&!Cyq{vEhx7GyKF3gU8O5c=58DSFKF*C*4mREf%j=aA=VrCD z^TiYnza6AQ`nww{?H}))7P$GQlS7a~AejnncdXBSKWy|+&Y+aK9fei~J;B^_ltDZ8jrWrZ2&Ai;WkQ53 zEE+#Xm@e0-g2L&FousNN1w_xEX*I6eFWYgeWHkx{NX#|>V2C~Iym9t1%G*{2fzA-+hF4Jb;qOA zSC31gA~WiG+>Z5hpZ=MPQBLMf)wq~|lwv2<<-_LG?J<_KRzjrab(t`<=5SU*9c=6M z!;it$6?%LXX=5`Sr7)EldnYrkLm%(_YVP-ui3iusw(}WoQ&%n(*7Gh-4QM9In{$Uf zgk2gh6cbqkdrRpx$T4VmHeZb}`rr}a;XJA-mjq@<8?HdLRJbmW?oBxEg-PkeH6qb= zmRV|N2tLwxrnj&?64m_1el4+AP+b@~%UUo3p*!5$+a>1Rq-!ORvZLUnn%JIa_JOYM zd3ljVj{cS>VkW-_v+B3u@BNRO4xjb|kus8t=l4mZcFG5WXKxZ+%zug1%OrN_&kk4G zx7le-`c@Y$9@tV{rMY20z0*jTjE=!rkOCn*-sgq~PZ8~L@CphGtCfj(+D24Dl$@*- zwiD*%+1dI0%N%yB%UENPhYf%MiI`AUNUS#(8{0 z+2_bBgWNfUX<`o+(A1e7gJ#ZK<8Lk!H0&AGZUI|~q4ds4^{7ADE)wdS?ABvGC+r$_ zk|beYWUT~gO_PlD&+2QRHUy{LPLC=HF_PTe(OX97Puey=$(UO`A37VALQ6rL_~rz> zQtt%O?#@oP7szihN!zIz#z#lNn+Oodq4jiLaW?H9`g{!<4e@8aNT}5=c=?&~l{OE@ z&EI&hw>1#y$~lB72LGy`wD`w#orQ$PKb_az7K_)V!y)qKa-4kF!g|A#X8Gj{CMK4h z>q)s!twK7h_ky>s^9gy-U}~7smgiC$BQ=>sxUBog7}CER&SRZ;Tc=dMYs-0c1Bt2l zdft0$vqAa~eLe&@6uDm@B*$9eZO3^wOYDcHPt$2sM{+8p^zyoDt(ixRis|f z#>UdVailT2Qk`JpHxJk2;ceUGP{c{yKH0{!lw5t!G_N@Z%Gw6FZi0 z_W&b3eHpI;S6YjAec(Z7}DlI;rqJ5IKv%NbklpXteMw%ZW7GK%Smnfa0U zsQBeQx9FGxJ4{c;Uu;y;mWup3{8#cOWp@DP3|eC{@PBiDHI;7E6fH;z_v4k?1CfH2 z=U`M7yxIqiLq5yD#p3(ZM;f>6p~DMJp3?VUcs}VrlHLgCeOl>mUO!o#j3}CShoL6) zKRfN~dR*jx_R(wcOiP=(8C+>J4OG*kzvpJ%Vz7^nKXSb|ew)hogG?}j*+Ru@g^mSM zSW*PaN?gcnPdZ_(cH`yIzx)4`-IrOL4fL_K+K&#bj~ZLT(;&Tq03={Y;%AfR8kg;H zbV>@hr$5QU#ffN~w27L-WCtrPb4R9v&s@0n!Q@aw{1sfdv}cBhg3&c=3ct<2Au8V_ zx0REO&ICQ}+!BRY-dNCK^E#Bfm=&Bx<1DAUu;x(7)riH_e<8oESBp)#>3(~#BWb!y z!-Z*Jpg)4Z#OWVjgj^-d?^ar++xe%@iW{4ouW?i3U6%j+*(b1X@p>1i*7zZ#$(2L3 z#@^E3#df*CTb>of>vzum7gEp5h=W770neBMABjXIBoZ#!@gTCY0Y0~#V>M|P)e008 zKiuIwYtT2at4c2(n`SI5+88>&s;d1AbJbKI7)Yy;3bsW-@gIMa1fL`E8PofD_-~X_ z@Uh%if2XsWjlcWqpo7|3*=sI*+^3I@&-)SSna1<)1siB@S?d|Z+Ao^6U2faj=u@{> z;(7d@<~-xFJN-|)M2ZaqB}Vm!?5->g!Q0c+tji8we3U7 zwTOj5IN90f*y6j22`$Z!nrmo3`c9}H3Qjh+JPA&o6-taDxNLo^s<0AGA+4bvQ_3aB z=amvrbrpoOP0ajn@9W0EOO4ptUp<-G zIqwsNw>SCaY6&nlt`9{fo<#}ZtUY`Fos7aBlw|dIP#oSw0 z5x;tMi}p7INDx3VE^M_lEG#VrQ5&~wi7TK42Ab;C+1Y1Tv&fp9W<0U*wi=xpt`AQ> z_AZ{bJO~ik!U^xrjyWYXVfN1a9_aUm2zB-QHG5J)^y42PtL3A*+S=6O@86g`PmaaM zdraHDTgjz!Bg?2_IS&m5joahqtPuT&073Yx0~6~ztsiU^^?jF}pWEenIks{N!Z)(Z zH8uaJ?A~d?OvpQ&2_~A{D6Dy)-wt|>vJV@f7YqBr@gx6QXxh{-k@(o#!ZJz7)@uWr z)CZs5+60cgn{7K(%rPpz(e)%G_G1lV%Ud7gr{X?#yUMwMe`u{J`7S~Qq23hO1y!w` z-8~EgD8$ey*b-&kUWupY9CsNY{xSoSbjnT1iKujD@e0g!Q|u@C#ZS{V^^r=hXD4_M z!F?dN%;|M>+)9Y2p?Owwi&T$fq@(ZhL0VaV7GXP@dY;RHKz@ke@z1kxQdLdKZyzs< zSLoJNX}QQjc8YVOlkc)BNite|C@`Sqds(HW2}3)Ow6)*2<&TNiT9Wt8xrK{sP>do%~%(8 zJ>JmJ(fgfSDc3kT%^q`}8M5Q<_lBQihJQb%P{C6D_b>-q!qt_su(5HdY5rmHPqcdf zW$fdvUM~!*MusTHsoE~uY%%sWC%q=LPPO~NW%K^cwLM1Bds&$y3VwE{Kr4ZukfB@CX@PK{u-|qu4^}g=^~` zg(ep5^ji>e&W@}o*p>!d8uhv%N!VP~jpfkK`dsZj_c}LPZ-k-)9N11bDJD&+y^rltDMTNm zvK5VAxotgXjp98$n@8kTHzPxyR-XpZyzqW+NA3`$R)PLl@MdEyCYHmwIStb$EG&yAQ)*6oxr$F7evJ;4RTIQWKJ#?Z=x#PydRDO7P&oO#4D0&}VSFI>HadyUZZSjex&iuAKII`~d7IO^|w zR^uKU_vq}N5Xa(S@!z)B6f41MZ|7E~pY8KM6%q1Cf_}_@R;)^Q>rJL30;Lr`Po=16 zUe!hk9ur-jHm)t<-r;Y3!%7#QM7OyrSV2=R9zF`)hdv|0Pa2W7R)))2FcR1Jl@t`M z_l}zu|6=PjOo!6TD#}_W-Fb$YDNOPnpH`BtkItD|biK*~*cn2+=x7)1_7DP^8jXf; zq-wW}xFW3gB+ubdY^Vzh%kPwQhc(#(&Q)X*HT*ZG0KbH!~sIv$m2lKt+4`xV67aH>48_HgAXqS2x*Kaf77 z&f>ONl`l|9RrM7lPPHxVc3``L&*kxe@BRll#x9lG97VsQ#F57$BzTp$O5z$_Q-zV2pY6Xl6Fi6{{M%G;&%)zc5mqF{L_%Ld?v< zkz4%9NUw#Nh#3=-E`UmG&Ya!mEcj5MdB#TVd3(0v>_zcQW=xh+S*GOLPr{z-d=#Y0 zswzx>jcGn}yYS(bqKMK6(xD(FB`Gk3v>@F%a~AJ=uJ^jmw=*An zu9=6q=Z?MCUTf|3|E-hokfNoXm5h#dGw~Kf-FnniP}rXp@~#6*@BXRUwCCQukM)=2 zOqiXzQ?TJCB#WON<4tQNPYk$KrO7;=!gk`)a^pGYQi+Zm1xFtH6B9$hwDb&bn9@w* z{>;$e@hW``+n}g=wZ%VgVQbzvi5IsM_c5+gOiJ2LF&Mi!@qv=0K6tOYqm#`=HVz%o z+tp$GB}hp)`O?U1f|-x|-3(&Wu|^`UOAlrGJhBu+CoSF|@6SAc9NxnaNWMR8fF100 zF1FM(GQ5j2dvn|1g^^o9mN-2Fot5&so0ro0N6!%FQ_|kk!MTEJyuUA^!~5;?#MVqgq4#$NKFF+vm z7X(t#C+Lq}Bw4-WqWiOJQHtA$b>B?Ole%Khku22}?tuYq@qXBYN8W;v(CIBhY;akm zY|TP=%jtm_XV#%vrK{p;q7(VPU~-FEli8by2%Gom=i(h}IbvugXJ(oX%YocWr8-DQU+i~YILwN*ht6sT=TP$K>-lYTr0HiSfabw6~qT_xu! z3C=e4foJ0F=~?AWE@<1*3nvK z0g{sfvw-;RRQ8Go#g{jsdKweF?;RNU36z2DF!mv(I5E*?=cJ6=&U%lZAK(1j zFWQujm5Kb`JJ`j8ebU&+koU1{Am88Jr}N`rsa^^lK=0R}lO2bj#C(PlN0gxtld!e* zeMfnO^zoJ?w|#zR6H}9GL3i4-XIpr|7DeXO+{-GmQb9jyf3lNaEbRmSfwjNo&Pb_G zt}zs%B&DV{de3j=HNKK@MK}|na$~P%T`6=As?etg$K`w^`pal0sl8q@dr+*MEFNsm z3V&sH4r8Fl!#f=xT}XSmI(?E7kNA>xj9Pbq8bK za?FbF=CSpnBl;?tzQtT=j1rZ#)Cl~fRkgQ?IcBUBAHz@;db;+E^jcLO%kGJR6iwRv zh^ckp7?x+iM5h*8nEBhYQ=SeH=lltd*?u=+ z)0;)rSTWMRNwfA{6t^Y7p!sub8~GE)>zU#vgAtxuy1M`13e1cu`{NT2be=tMuHBLzG1y|QClkq_Y+G1@6=T>r0- z=V2|*EQ^rrxN|%QR6Izpny}NeU21S@NUZD)ZG2*DI#>AJMOtV|0o_NUyoif&P33{& zkt5$)<}e%C?vAICUv*^o`ES*cn(2w5x|B>!jVfxsp3=L7T$%9^mKHqS95}yuI`0(o z^l5pT4X=n`kln1^D>us*uhnVxC9=*cHI`PC8``Wdm%bVLy8OIti2cx^p&_i0Ek*|I zASNME3neI-?|C*C>po}QS&%cGtY_v}b7DgP{4wx6C4;iIg`1{4QMK%BpZ_5W^*R3^ zg`FVYOmC2hoXmn>Iez-#T>&Y6v7ol-i5aQI`Z=#%AKS6+v_fBDC+Ey zBfq#nVT*5Y8asZzKOA}lugRsxOD*zvXBgQoRFmiA=BAOcUlQ@LH0&V{YFa||I*$=N zgw1*%pY82btQ4btmsXS-6^rkFSBxU{O|yH?UIyJLnjBo#PtE2@25!*HAdjF)wh<$( z!BR>8mi?Kf&Nk;5>X$wyKeESeMh=~v{*KUDxY6Sy;(6QNT0%miv$t2-%k?;Cd+kSc zN;YBhxPJjzI1@)QlqY<;DuS08 z0^L+>cEWt>*0Zf4bn26qs1F}=be2(BFJ9N#MWR{PYqJw_f69ne(`7HsT`N(~3=+C4u} za6x>&_{?xl6HQb6AkTYtrs%8M zURj#q3clLZRN&^S&Kmm*)OyFLJyr+#IF#*Vt@bGt;@H?)?VlHp*jGG3GTts5mTbJU$CoT~VW6)ifp zPaR7(KCIxtjLf$_(tP>pJTUwQ`oTk zn7{T&>X^%AA^y#YzN=_5vfacJAMNLcLY@T8m!VXodV3)Qz2b6TDVlTWBXjm;LBTzX z+Jiqsu@7qlVy1d-1>Pf(1aMG`z7DgIE7hL6iZS`#M#pJ=n}0l;#6s8T0JZsz<|k+M z!s~!28FoGN8CUO=o+SO0o!vBW8ZuM+!c3`>>>$l$$P!==qLN60D z9hE>_z8f9gJd}#$>alHcx68}QZd5dS)WR|jw1I`L3^S<3$Lyv=lr?8K-9Uegz$^PQ z^R!n=ulCx5R|_U`a;tJM^BW?-q4NLJj>*ufhz}?yjwhzlocaQzU$B88o|j4UhjMI8 z{O0z)TO~{(KH8tK{-mTlr?FGMURF+t5&hs+0f?keRdZfLHZv>(Ht#3f8W$xNak0*+ zErWxS3PO^yvL$@jvZWF>-h?%sr-}2*Si`$7cz?7NoeuPU_g8n5;^L7j%>XVMqy;?= z5BqN4_RhIR*h9g>>eQoOLYCWLzt1jOeG;65*h3ZwyxqW(iGF2CraW9sPoFG~T84goRp31=^@~b-{vEJo zHr#(T%O}EP0+tf5sUGbLO7x%{eLh1!@o8Mh#vdDR@7vzDXS4_UNJ)V_WzavwJibx# z;YTHp#^VcC%Han|_BJ-3-{(s>Y<;k4jze!R@=yM{SH8`b=~SnnMJbkfPoe+rR7rSZ zrg@IFql1R+7cwQLO(NggX6S%5U(ft~`mESWMrqpZvy|9Wi zvi)fVJ>l<1qyi#2;s&Zj8lS9b?Zwo`acp~psAX|%{$VEH_7jSDP5JQYlbBiFkH>+B zN8yK`bC26-TFI|{hwW(5ynk%tp}z9PQ7Rk-X0>az>lm$T>`~rd1wqPp$H|Eoc?-Je z!a_YL3GtNG)zxKSU_c&%4(6w{2H^gjd3eIjyK4gF?7IH;HDraYNk&U%+EXGWm!;CK zV;Jdctxgw}Ljv!bO*Ab!jMpi(wk{Owrgmc=MC7J29LdrWq;y2lKurb?qRovw!82Y! zkBt1?7%`&*wXE(v9+Od%3#sA-ZUu_D!67@Pr&{*G$U|1=g6`MKcaE-&W?=#1rZIHv zZ|T10m-<;GEG+C?gXy1~lP;sr{!}iie6&Cql9ZMfx9X2DCPS+H6&Vm~JJ|kQB0&(s zsKmC!%F0Tu`X|H|xQ<~421B*40;ppck)*VgbI5DSpfB%d)e(g4#=(!(lw>ZgJlXAJ ze=1^pwVb~P%@)M?xbK`fRyofTN9m}JX>gn;e)*_8R)yod0D&_@gbK%9QedG(sC=UR zV1bVtag$%W_Us$Clm2^z|DP}NxImGrZ$N;cLV-Fbzo;lcxvm8TRd?lOW@Z9$>G<%F zzwUOo79Z{K&=5bDcxtC2FfD%VOXJX=X zYrgaQ_wUc#5*O}RS^_Tcx=3nl^E54EMdJ?CFaUe-3P37DE;a!m2LNSkN+AFh`1EZ| z>Oyx%2>EVe=84C@calw?s(qFt7)bfq-t^!qOdoow>g9k6D>Z+?RAUAK{?hiK5 z)QpCTYH@)fPh32_{q?Eh4}X|t+|%U)igum@BecM{6gREAw-*jn*JTJo`qI);9hc{= z)C>$V;g}E}g7u6^08EUI9)6D`@~o(>MYp%}0qX1;`7aeo-tEXok1AY89~B-gYT~?xn;&evaq3g&%pu6jdzEU-RxVa8{E{OcFi%u`TYCuwQU^XXX@FYie{nE z;yBMprU|5yPq*HybKX)@Q@ejZKQ;AtwnnUV2b8|*rAa4u%z-Kj_z2*>)>c(TJ$~Gs zsdy|Je%M(OU}zNxkU+ROg@wW=$+(oFfHegmqMn|ft!?(ZcNdXAo3m|D%?<2L$Ug&u z0N7`N3Yh(=@EbR7B*w&C5VMW?f;PEzDy#84k4t}(j0v$waf85o5Pmh-)HhvrGMS3rP~e?6%dk+`zFycy?iLix|1 zI#6BXciZOVt7oD4aiOM-$hxUcik zKGz0Oo`Q4#2Y@14)cWhC%O-2S2f;iZqMt5n4NY*VcX-8%sm_SSx{LN#cy3 z2W&W^qN6tU@D&N=qYt%bFS8?u%7ujm1QAJ#JOtZs z$$qwnEY&Rv81mc|JU$E(PRa zZmtus2UR%0#V;!>0~kV9=-vf$17qXAp!;@nBX?p~d|+>Fefb-|%*Bgm=21pw`!EE^ z3SjF|l8>FF>$5={#?3tkj7=~c2!e+f={)e!qW*#3(auu7Q87T$G(tn`)6$rQ%#Dq! z0CMYUCh+nKP=_#_$H&LP!NHwEuJ`VhR8&M?AVV1Y&o#H$iXm@vbBWA2X-P{PuRI(_ z-N?D^)b+YA%LfM`At8a$gggW%dDp~R7G(tzI#7wHrat+qC3sVB;T--WK=kbG`2(2- zwkZqZo{NWXFLVP<4)}cmSzbp&?m(LWnLo(VN_l**uqL*}w#+0Yoo7n~@jU3%gsbvO zN<4=POpH4yFJ-|%h1ciLpS8IkU@?jcMy~;Nib9c>ymZG~SJlM|L3RpIo7fS3?Na>MHu!T zq-b+1SYnt>BfCy^c6_g1J)@AL1YAAzWlGg%f)pzAeD%SorKP{PI>8IQ^GFJGE^ zxIw26My*8$m`=e-kxnG|kKmKYNIaJK?~Y|ALiDMfs~&Y}6kx+SL&^RvFTcLNkZKi1 z;soiYFRr8FvKGe1vH&*&i!2M-(TmC2N~b#m7Y)F40L2Q1TUKxC`>zmXz0y|-oJp5- ze$MhvlDt~NN{D{oUOA3|u8Hj($no9!R2fLnK5)&kUkc6 zP(YNZt{!D+f!zwM0}iKM8E{E}R16OkK_XN(fFsqbjAbc;>cOquT>`|sAtVTp6kx=E z|7KvzfdvxSRzKBy04$xDxL$U<)VOPL8?cKs)YRtY=7dvY9u0>dD=O&I<~?O$S%W9V z9)cgQvDW2OYRs8Tyr&HF7+k4_6n%@o;}A1o#Mx)2r>U5k9 z;HrW~SzTRy{SV9q_Ho4#>^H|U)nW`$V8rmE15;KnLu?DINh7~C&5Wx&ZLVI$ONX5! z0DpA;{P~F22@lfgc6Nc^OoR;G-Q9uR3DnIaybAgtq@k-17?^w8+xOkwqbHsMhXU@>3pv%jNgcr?d6>f6Fc(Ph)xf~0b1to# z%gfh(x5pqms5APdyPI1fobBh&)7yux(!mG6ee>)OP+359bRU2F9au5BsACe@6t9FHm4l^9Nf|&Mup(V`^~XUl zMg#-}`DsZL+svvwFUF{kReG?S??QeNPUwofe5^WWsc~JQmaqwO$lVuR!C3Rr zf^2ST`eB!#OnbHWTi%M6UnZ-z9N3>k;>9c+ixecGWx~;G_ZjdG13qzVQ;^J!t7cci zA90=i5e1w5r0S#eW7Q-#v7KABjO~q)IH>(d_!{!X&11*(>=V4mE_tNAI`omNJfdBT zrJS3>s4T3~%$Z+EawEd)cc0?G?h&Gl2>0G z*G5EV?@KvWUC!4`Td`g)Hk60&Z>x?Csh3YQnz~sE`@!+)Bg*sp?iGiB9jr55gx`0= zcQ%s$rHOUP2>RL%=VwaZU-aso1!jnI+vPEwzaQQQ3pWk3lYF{`Ay2rzWk&6Q-|VJ9 zqRc$&5m0NIxNvOL+?`+Aqfs$rkmT=2q_#cL!@Y!XsULMN0q|ZavOM4F-lOm#cS|X_czT>9|6TgsBcu2Q zgIG8hs(v`0{PI?@X;;22oD1g>zB9unZ{B|Kv+jKHvkwRfof$613Y_MPZ|8l3t9hyl z@!VgAp#vj=1%te^LtKT(Zqw$)s#VS#qib-^8tvr$BDTbQy^T8J#Z7S1v5ZO(<^|o1 z4ySXsBse>ZcPbF@HLI0k4BiIO&A>g25?2}1;3L#HG`hIJ`tj_jI&jo?*3s)zHgAmF z&raknUcZQ%30Z4nOG^uIZ150n9v)z!UGosk3YzyUDjKT*h6LDp_#(X{r#nZ>Ux=oihAI$gS${rSU6Q5TnQ8}>X;}u&SY&^YQr#xi(o&% z1*UW)I!1p01rio>3N{~5=u*U5b z=j2qIAU{OvPtpDTw(jmC)G=>Wv$H|hU!bF_D}UvRb zNXh65l)PQy<%I=aDXZ~bUpBZ6;4>}hGe8?mpD_b6nzuxSRJpvFU?e{KU4j*RO$~_$>=o`m`2XzkmSU5AJf`?psnkX({TtfPi0z&K#)gt)%GD(fPT#r!BTEYfQ=7 z!0;5JKi30nFH;i}F-Ee(Z1|r^9rmv5T5*iPG2etB?b{MOEO6AD4B#I?QVOblLIykD zs@gj^7#J7;cpO&zknT59z{diovC4BAW+41Jct$10;AF+ecNZuCDrJ*8tdAzZ=`8H>iYG_5t-)RT*Qzwj#RvJe+X#kI|D z2G*4)gA(Dvg9p}?;y-{Z242s*cdBD8%>bK7Yn!#1D2Awsg~BN*qop3eBT4QO$hW4Z z8JLR5L-19A+-*x|U}%Vo%oP#8!%gy+dR$U+Ia%9xuAK@-Irs?B|@*^7`6;QJv-tjXSiwN_imk9r14A zRJ0b6lM4cvCAz{bMU*zR6N*y`cBmm1E$Hs2%Ydl>Q5$4swhW?f;%L`0sRA4iEEj+x zr~Od1wa1VFgK5}neHlC!zlE*~a6Ny=!4(E148UQk-f43S2n)x@Lh`B^+#2wRPESq< z`DWMG*H>2+Jz1Q)$)78=DgW6#NJyYi%9FAGO-;ujnKhl))TGA1kSb=^`fc>f7jXqPEaZds5)Pqj1SA)mGL_ollE7xl z_$}Y%3jl^C+)hWwVVHrxe*IZliEEy=gr+mx)H&hqkb?yaoSK??10|l*n>H)UqEC@e z!RCye@q~!&_sGc8=IJ;HSK{M^g@vC;P7e3?QzHlw)zv?e=0#+I3X`vd%>ZI1oz9K{ za&o^LAyQsh*;#%F!vYqZ&dyF|DVG=HbQkX;H8~%{Eb9U{7rN8HUfJpbDvyke3>qaM z2pk#~D8Rc%=98W~m!k^-DEjwrqlKknKny|mld{njh|?j9(8K{qM&KrXDlWFFa77sl zi2$sRpMP_6Q!X5%@K_B}n^Rye!7qi7W*hC%$g22&HrmC*qZmvUAK%}#H8=xE0#$xV zY(>85Tk7g|!Wt8Lve`Cb@#P|cm#=S>%y0>hxybLq!MhJuN^q(E7m6xKHQ`WtUxs!c z7goKi$FHSavMGb91MYAgG+6O1`tLQ487M2TQ6gu5=6X#>M@L;9zb>la)2GWaGHubL z40Lp>Q;m&aFk!E1YAQ=7r-=o!Dm#$lv8q<;z7zXUGSpYk6gv;5* z#S|d(w6xp<7`V~U^aIEL{C|SMcIX|WN2iUL|2C3E%2OoeP`VNZzCKx_kd)T+c9NBZpKV1-s&^p;JBh^Ppf0E|Ey znc#!P?isN1@81#NaId34NM~kje6lle<9`vQga53Z%5uJ_)m(>eX60`w)8FJcwmzOCaTp}7qFm1RbI2WxIj^b#3nf@=^N?A2r%fOxAQO94$QK7Neu zT`U`QT^T#+LP=mT`;Gjx&thV(W%XG*i6m=B={okLJ9~IQ-zUoOf95s5OZmkOkLu}d zOJ%vae{^>KgAOtq524Jc$nS=PyM~6qR%J`v z-*3cbsB?Z7V-8UYhq(;vXSXA?R>*9g4h#+k|9}bkH8C;q>sL}r3fKDy9BGsCdsK;S zkV|ZXZb!yKhR`Bor56WLfZ+ozeqO$O30)%O{Z}@&woEHrFR-+LIfA?rB(eG5Phg~B z>OylU$QHnWGoFUDm2BVOAn@SffhV&1Ys1xovlXuieK=qWmqPqn;f8Lv7mrYB@9v%e z1s)7TAt50M7@?gAB6BwPAhD^do03o#j>Xu7GZE*!TU(SmPkM=ya|YTpdDeXX9HaJb zZf;J#3Z-uG+%eb9z;!G#M8lPA=Y zl8{`ajyWq3;Rhchj#-AnauWvuH=-os9&Bc8FhKHkHSU50Bsumb0;X^fpv+OQ$g1QH z)YjBsc8kM5-_KRj1_=?V%d~ejX~>Y7h6cefD)N`$FXb@xPbqK@;H&<9*&59xw{P9r z1e$Nlp6Gr@aU-V2zJpqWycc#l-jZU1Z-Iw_LR~m&OJpKoQi%p;LxlC~+5SwW&^+8l zxYef6eC1~~JTmb9?`VI*0mJz!s2%qm$=iI3s4LZ8vzj2sL^yQSnhB5D2=2xUf6~0_O();UYf+R~Da&;=q3>HsWgb5C|I9 zzkhI$q!e5TgcKqnETHI;a=7TKIdn^cbaL&GftFcr?)W7)B70ov2Qp3LjN>txSePUI zrzyHPsUst$_jNr~T1i269KnkC^CG?$!>mQetZn?g47&+=pV}0Tw+%?z#LiQXuUU>C zh6cCqZ@t>mG5!12;RFTA!Rsh0DsE^Y%k$jCBTn`hGS6B#+1c4SIb99LGmL}J-@ot& z)|Qr*W@fu9D`SNU)JRB3=;-J~L_~>7A=t#6HK9oK7N0J-CYRZa=8g17R`M1EJG0Djv2W7+zGbol@ z-$bD!`mSzon}CnG`EUISeSLjGA|fLXM+p&;p5b9SDyqEtdR)lC!GVXTC*F$yTIlC9 zf7|iSKf4b6{QU0j_owT9+uPen@bLYf5D_skldb^t+uPftqa!CLCoeBAT&`d6y;31y z3p_nN#lnesy&i6wn|Z*!@{6?DSy^k_+wtk?{9X?(?(Xh7Iy&shbjMi{4&ePi!wU!t z3-9dhadB})F`AmM2TE&er-4c3IXSs`@YSE;WDb+jv=*zy#&6#s zuG2mgFV^$?b(s^vTSII%f4^zKL<}jUe55a6Cu}l9F=b7Pjh*Zc!ZOg;S8u;o5EI*; z`lTTh@Pdh*J#T#Xj2=ni#g!HI($;XYkdTn|a;v405#_Nu#ZS>}S6A27Af*t2zi2q( z;>jG=(qF!O`TA9%L{9JBCoZnM+*~yc4ey5=Td;Ism>@KFj^JR@A4~?lvhQOw>Z~HK zxax$%{@ol+Jk2w*%X`C~iIQFlsk4yR%OU<`EA#Z=VKKP__va_>8zD*ZlOMiUSDE>hP zh7uA2#;;*Hby8SR(B0dsuAzZR$SG@^K53e2b0EsZi2FiFsB3L)4ae0x6!`>G_|@u6 z=rdAe{k85O4y%RsGG+AV&wWo$Y~H?o+bo;=x!gG$f&&ghrmPq2(b@H_AQ#w_7WeC3 z0YZ`Qwn9!1D0pXQXX>{RQohJOW7E@ZHmg3_+1brd0jaL~cWms*mKGKsuqV$Ht0E=` zF3*VqIbf5xeH(!W967a4hjR#MI1zNvFv3q~^VV%av*LPst7UCZzjdk=2$hwUM@B|~ zv0zmZQ9Pe%ddGn1t7mO3(>Y$SIT&v}U;E2)z82E1UjgpUDlO&ne)50|Z1%jrdbmA* z^X3hMPAe-Ln^u)EdU)SQI=aOwlhHUjjqVbp`)|R|+&L&jUUvJTU`eNN3e=XsK`1FH zy{2q9Q_iC*wD9P7V7#SOj3Jc@n;pOJ#xw*M@FmZBzK3`$b2advNFF!L_J8~aV@V-u9?SiU%^(*qE zlAW%Pw7@V01qD}!^V7@Q8w1hk(&L{T_Y}d2J%twBj`<>C-re0{fDM0O-$zDA*DBYA z>qQ3`6d`A5xbfo^IR)Em+x@9uPmlM-g@thK`_pCjSBDzj>1tKR5*tzK<2i%oS| zSw7xeR0054d5FVBvsXoI|8~~DBl4Q2qM{O&N3T?+<>lpt^bZY9Oih6;0#L`s#8lWa z3UD)>-&-4|%D}*&p)tSZMFyLFhUzcxz7D27IXS6Ps_6>m!=8M&w)S_yI2juspUeB{ zp}LwisO~4lv-JQ6d3kvUM@LG|(c2gr1?3XeS9o~0Pmk`No{!)R(Cf4|kMQw7-DD%v zox3zY-d``bdTOYtsYytf?f%Vu_>stAoz~QJe=u7Gz2fn_yI5$j2j-)pr8T^21FW?B zHWYt-XGc6!w%k`pn@5-#SeW7mr6ncgO8p+x4sJzdBYsxjsur8 z>pKi!;wC27U{74%`s{Tsp$FyU~1YqAh=TK5oqDWa36{wiCkdl&m*q8bM4yr$Joj(aby{;1VVP$14`Sr{8 zCP0zVO8mQYJ|-Ts@RCwBEAZQRcz9=}&piEYg-~Z_XC00GKCck~_meM^zM+M1hq@ks zt=;MYf&$Xx59yzbguvPvtQI$vgj7{kjg7a!;km&o3JI|AE=P9fI0JqTgdB9nftV!S zWz>9pULeBIYH~vOR#a33;2xaCvMY?X$E*2)Xlf8ZFlg43))!`FWs!K@WL}L&?e6Y^ z1N6@nRLZo6^($@&fE)Gk^$jQdRB690{e)v}eLR}ZACidu@X?d=8r^jzRz<$Ny!>Sq z0s%qt(P9&Lyc*eMHFP?RAAKkHYkV*WLV|}&P$9tU0>DaQHVOta z5w=1DL6A|^>`er(gzmBE6Dlg|-VhnRPj`1WK&;2hX&pE?xYsEtGxPKACo3H_b((En zi~apSVD?}Q;Y%8!=H`2chg$AwbcntkhRnB6L4a+ieYOS$2GEf?bM`D86tJk*z~3&l zxQEP&3kr6gZ4NFjF2a$rvR02I%{m`1wRqgxJ!V^2Smep1FO^{GGbgfz{53N(d-L{f zh-(QaI%;-FiDV@@StPJ+pBs+FG1KZ_8g!W{f z+S=OM)&>JBBa_P2qhHb7+zcK7_l@)iKwxSLiiE_(Ny6m#cxQkJU}wq2!hYhjng5QV zf!I9UoC5cjn3VK)_X7CaXWsx~goppLhnni@4<9}xx!X}E6c!f3YY6Fg-dk-jkdZxu zB+U;-q_IJ#EEXG;)YKZ(#QvC8kEC*k@FP2Cf_Ne%Bot>m zP644C_;^59mPOz55q>0nbFdik%s%nc~SQrs}t7~SaCM)~jLI3&l1#pM}%vyoVK|n$ZOrm}s zkptj(ZfZ*T{~!MjV4R9|OUT7Vs7r0}9#zB5oB2!w0|UJ7S25xW;$&bx9i5$Ie0 zmWz!DX=!7{Z+G@O{ZL3-{Gt4wK6osjhqzCbOa1sfGe#`=%CtL<7WLWCAP`$y+ZMOW z*KI@;H2>a8SNC)N_^d^(>aTKDM#m53sx~W-6mBHw)MpQ#oBOW?z1F8LCtaB3c9cIp zd9NHTo-8`+n%)feu67A1y^&6Rov-&q#Bn3cSqSjezpTt!Uq4G#N-_yPER4|z^dNTBxv}wMig8IsQV>8q8c5KH!>R9B_*&dLlGq|5;qvmzp>I8& zX#ecBzQIFrazeKl{q5tAwpzEWyRR%R{_UemlGTS#cqJvI?~!Y*1J$ZlCX36gclUiO znR#5v1iwF^2r)tX5z~>WBSBD@!tc#TO8U8bz0UGECZ=k`Yvo@){7>JAxHBrua+lfy z&BnhCzL`*ur?d#6;Qg)eamD%~+c^R_KKN?b#+5)Zm@`%Dcb)!^4MDzmnnc8*P0}blab= zr1CHC@M+SAlfpmtZL+(S)F*yWPdd zGcoZ%mhxqEoKG${5fIq^i1Pv`x!CCVk)Gb_=)3sbiD9ttix)2(930HcKGA@@LU)Sx z1{NFxfmm6+BtZN9+h$8LTa1S0du3$?48A08&`>b-Jf`i*#~bEPqtM!%^XHo&oclf1 z2N`jQrDcKI-{T%M({aR!iPocx?B^u>?BIzsG&I1$yP%}_0S|7p)a;5*k33nBLU?lW zt-*d@`-GkO84JtJ5%)mcwaERI>^4s3gkpK+9hY!w1mx;!tW*o@kjEX7on1$#w^wkv zbu4dqnvM`-XOF$EvA@5ch}((v`aQ*SPA4IH7L$>Qi3!anr(fk?r}FXyMG#~Q9h+V^ zG|0-qL8sY0u%l6;shRqGMf=ut8_x8C7E;Bq2&tC8HSmwznKLo_&wCeEZTi=`RIk?Am%N@+BzT5*3|1YAM z&gn-vZhA-bUmB7gA5U$8v^`8PQ;ni;u=?oE$d35&V*(43=+6lKAPI?bZx{z}WYGuJ zN_e-6Z@<6ba$#3iy<>aR=;S|~EFgj~W3e-h+2~lDY^zCMqK)3zcydaPOVE|_X}S4+ z*?TM>v)Ps06nj?#USMZ;cd^ZTxxM{!^YTjxgkE&KNKeO{J@cJK0%BetSlJ<646nOx zPq6WU_YLF5R_91+C&{Z=c_WAww^Khi^--kJK z8f;ly7==ZB#vZXj40?Hayx0_|Mew)tqk^HKkr7B%?a#NQT;Kk!{{|74^z(+jae^IK zjEzG@?>jnHju||rAfNvytzd-qzW^y!&+!VY0le5%aL8)B4k<;>5ich?mj~|FFqRWG z6+MR4qWM$|hXwom%nYuWr8vfVKr`6P;o)Hpiy6{mL3|Z$jNII@U&(&S!y|}T(Z)m- zh6AsO!<|>!;ZRh3C_qHme6t_7v9-0pcJKG-D7CfBOE^f_VniY+0lB!`Xu{PE zyn{dz81*_q%m(lTb3I_<;o$)UCSrR`2CgALe(cQE6feAG6YFdT!Lk7(Za_dlPfyRy zke=sFV76+xvyc-aBI4@mDm^{D--=j@I*AYI!Qr9xLj8BO?DrrY#s%*TBIVVgiDBfU>p`KENff-JWlUlkmqmzbn84_cGqBV=I1`tVpzH zuI6FIA08i%md;-{i_!0>m!kkb4-6E>hX4G#>i~uBgRJP|$B&>Il7ti({3a+nn;hgK zwY4~&9f$}Zs0S4ssa$y`DE6z~P#;?d2hBRGR8YyP|M>8M^hsXxExl&@xCDZ~P*1Q4 z2rj?}QMf?mnP_w0{Am6gH*ZtAnf*in%HOT`Sod)u_sS$k081Uu_3Lt>$^rm@((Hp8? zkj~}-5>D`nX=(p{$x2V=(@S_@0Z||~1ySNO9zk!noGf_VjEa)gs>;gW0|OFYzTk5^ zS%frv9x-GFg$58mpP!!-5T63fGBg_Xc0Bjpn<>ZE(tQPA3)Ie28 zNYWP;G$ke|T@^n#ZuCcj<77p^!QleHy=WLA9xg7Y+l46}EwaBSq20ewP|aA% zlFioU>A@A%pBdCVR8&;Z(9l33;*a3>VPlrWp`Sm0?(FRJ_lpyU_l2{}rnl;?MZD|{ zeHi!%-2=r$CMKroG93{9KZ3bI+WY$Y78Vw|yWt@q@UE_|-XoNkl|9)SCV+@bNYK#J z^En?8Lh9@5Rn^tMl=V^YND@|d)JL*J8A^GAY=798nwt9gXHdolC16O#)pZ<m{%SqhX27F){efw3-13%)0y3NzGO_<>QC$JX8*N&1uEmEKe}aRO(Qk1aXs zP|_55QxK3Rp;zTOc6Pra>6JPM;~OV%>UY+gAr9gD1(9R?B+%2FBIT1L1lb=lLKWg< zKoB|p&*VJ4sf;^DTj6iEdav6F56FR??-364=BIL1~W=BOfGW*r+1CB*VIlAb|$e7Kq|`QE_p@ z?Cd8hs$NJ&RtYKu#5L9B$w@K;^u*D`CI?ri~3g87yY!f%2>(!&m!3fGwB% z(=u_(Y>B<-z8&Jy(q1YLPug~Ng8I*@Ziu|^Z&1=EC!5XbWki%{?kE3sM{ef>F`a9x ztIMnESXdb&I?~aM(W8)eDKS9zLR_5laeB9)|9#a`OF)<%z9Glw{9@^A;zd8}jq$$M z82LVN5gr_2Nl6bjHz#_;#Kf(wUjNGXziRmk`_Q8Y{?$n*i>I@*Mv@cNu4*c(4h%p! zy|grQ_L18Y4#|#lGyMzSMyr=n0&MHk<0^k0y@t9vD4)BzNwafs#JdE(tU{EC=*9{Z z&Gr}b3nWG*;vMN3w5}{{2m&y8Gi--KzkZqQ$L-9J-ZWKHe3Uc^rIt#fXz`xTeFH)O z?sxBO0m^{lygl)zl?i_q4C2nl<`_w^@fx0-V}Bsq-5D#}UZ+j7T=#qawjpqYL5d-2 zz7*1AK|(LP8_u^uX>>}UsmrIm;SuRNTKqTb`|Zj7V!3JZE_ye2nZ*i&e9-Rc<`+D( z?b2nuI+!JReb%SAJWDPnRedn7qxP3KGAatQ;*Uz17K)0Jl6IAGEI=d9RH7k#3d%JK zu)?h#n`f0q-ye}KQ%PXP#tZZ;#_vbnXN!y>bH(Eb$1^F>H??3 z4X*7Sv*7OmH`}d=4zchkU*GGyAJ|;L1dD{frktFdv>SajB4n+xMLA0KUc)%xV*NY2 z*&P%e#nloexxkn>1pH)APagxu0;k;se2}vV{KpSNXuS1@1M3@}_dbu0kN*5%)dxOT zy${ z6UvGSb<8%R^X$JtD@`ham=qWjp5E&~(jTDE5*HN~#gBFG#T`qlUpW+`?lCphT#{*L2Hb3+pQ`dUH#b9x zdB1$-1#^xASpl-odm5TQ6B8DUwh5A3enD8;&0qN4*mLS~6AwOOe=cs>nh7dBoR|+I zIOH=O%g}Cd3tEWTfwNs{7ckk5giakVHuEYXcCiZD5pjRo+iOu#bA7y<1uKPk-BBV? zNxedY^dZ)KwogC9pzS%ePpFM2LHV&Rr&$#uM6*x2^}%^BoW&Rs!Ln%fhnnw3 zu`~%a80kUNWhNF2HIfaIQIgf{Ix0HOCmQETOTg{^GQ+in06%nVmFc!FweQzGy7>|H z079TbtNH9uhwt6d{9Dj+VT8WK1D>>8wFHmFxSTKkh`UllO+;LiypuMBNrrlkATm0|FgTsIe&fb?*M63_`TdK2r3)TK0UI-JBS21+qYo22ld7)n=W zNaFzFxth>P>w9*!tBRLTC0`1b&{L)BYHKn0NlYxv>9zgsx{lXz@x7dn4%9&8s$K(P z6b&X0>wT zljU6X$wz7p`6)>I0RqdMSpn#{)eP_^?<{h1Hzg#XC-7P)ZkR_`fs=2bbq8XtFYC8% ze)s@ToOxR{s$BiM7~Nf`$y^CBOoEGHn_uHoQ&Qj$s0y6zfk8TbVltT0mzbuKnVU=X z#q>k3t--n-5%&{jv`m8i_#>oY2o@(PX%1M9VsiRNuNi9wZ^2R+hz%u3~$KJ4yHdX(nH|K$Pz!!(<|17osZ)tKu( zLuF6qHC<`v4-+;sbBu8Nel}iMH8OZ%0`t7ng+f0btILM;=rZ!~@PwuFmlPF!I94~z zR4_TL^S9RF@&4p-b8>gg&githzFxdM{(I0Qjd{`&Y?+`|WTva)cLw6Uk+Ux$aV%4~p%z+?L%H{V2vvec;_44{9mQr`#q(*$H+C;-w6XNi? zkEX}N&_8`-7?;c(Z+>-P1xjD;l(g9z$R36+cT35R<#jq+`0vcwp!o2kH}%yE_*w)8 zV{y!M2>dNKQ;_WlY@S6k?r(aTDV)}zYi>2?t*i6&6(f*EuU-Z7@mN}mlSsLTLFD1@l!sXUuvw!v|DDD<}(d3k)3&Q5}YSvmN^$PhscqO*&{_s1Xn zE;m-IltQL9VH)k;g?V{X`DG%;y&s+MG^!D~o&K`3?j}Y>fmYpgy@Z4W6DupU@cI&k z-EN{A)Po0`P$%p_^XpI=gh=uU4GV7gJLg5N3+mT{)t1w7ed~aS5wwsuimA{^-s(zt zw?8o9;~ygFJ|AQc)nSWw-zpQzP5;8>ylXx;m{suJy8N)*YSI12LIz_uf8sL;y?~?x zK}6)K=8%!Y%QZAr?2P%U=o{beM7*-6R)?x8X%kAy+*R_xv2>%K$*iJeZ`jR(i$NnQ zR+L&N|9wz>uj?b+IztNqdoq#RPWaPvVQ;UW3M1OJF8FdvHScYFmoL(Z`7*5Lo11U6 zGW*egM^y#~2Tq3r&OtOt%>9vX$qfraGEB1lBeBIa-qzdpj*go=uzIHSC4BNiduqU; zi#$t;xk^a8wtgx>;15m#aGsaP?wDkvzc#HG%s@^mlg>wE!L68rCZG9^`^#7U;Ut27 z@4|eOvD(z62BT#}=hBiA)#mnlmO%1)DM$3s=YeEBIv*5qyTpF-;xsy*u8A<;&`wk` z;r`Tak?LtCReGqsnSPV(HD6a~A3p*aYxueiIyLCH3vCsT&ej;xGVbr{r6`SZYm&X0 zm{@R{3FJf$Ud(YjWffnuGQ9dwHauc>GOWR|xX?=5da;>Ih-GiG0B;>h;2E9l$aFT^ z{zk)GdDQjlLICzuTU2C;ztAR?TpJj!^Xhgzr=xw0_1(KLQsmhj0`ran*1d8$p>7Kb ziklyejY*FY9S`xxOE_=a=!i}EGb$cctRiL*HVu$u5)$w%XKIyGO3~B(m5vVyt{!Yp zFrPs0Tmh6Ks*gV2d3at&tv6^M*=$#^X%O@0%)M~H&X>hgP?p29;J)Yyp?dQCTp&R$ zoq}sPZAyNL>PK6us^X2)`iSEi06>R>oZctXJ<2IKo3+vyr5h&5+D2+a#$uVJwtX^~5#`QM6^Fe@{Jl-14f zHp03a+hQw&?LIYWYGr>H2o^0d_&3`MA0X|9%zZyEU*`7!4v>fS$>puv+~Pdghaoo&ei?T~xl`Bn zjK;+j@NEELn7N&@6RN+U+-uJ_=UaRH?N4X^vV>uPbrP+DSPVwmOR(mx^e?F6$=Q&d<6ZqZ#>a zN|7(8jc2FHSU!%UKap^NHp(5ZRJWOJzwC63JIX`)t{LX~n$^xeK~X^gg@OS=lUQOF zMNljL=QnanB8n1Olsgk5*MbwSkY+xAVHIU%W0v~Wj})EzS@XGdxwG?~AlIh+k&Psy ztn@0N7mkhT%6MugItoJ^X#PR9ogLfdNGAaXjo;l?)jk-j=a zB%=HEjr=Is3~gNCwJ_SpbHzE|1C_G;(bG3=r*{|#B(8~Rw`cR?Ons{uYjq{1nQ0Lb z?Xbko_0v4b%hQuP3ETLR=!|wq`i~O@&PVjui8)p$P>F+~z%w!F7zim7Q|6PY)qYNH zuGW8Tu83#uDi6|REy-~?bq#pF9T2Yt#*pRRJ*Rkkk&Uqmg>4S&rT!+=oc5yPhBNx_ z4w6HyFVMXkj>vOg;rJWO_K7Z0lJ;DkiD0Q@YrOL**np8X4GsloH5Lw@Bcnt_re)^+ z>y<@(y@ZvG87xdF4foUw=*wleQX5i{lRt;V*kn@rMOeIF<)w0*;4uo0Fy)kzavVcd zQ&okeNlxaM{IvBI&d)AB-(B}UEA`Qgd9P!KxV}Yj>-xeq{u}muMCRH^(d1b{frsns zwZTM1IlZ(989$It`%WPl8uOKvmWDM;#wvfWvH3mQ+f*h3%B4~HEX*nu13taRj($J1I`7!wA zE7(C51?ygxg4x8`2NFd{JM;?+)l}Ad8p}B+)63vlW?Tb40w)UUdjJXB^M6z<4g)4# zo%i#M5CihgaY~+j1L*NBy#<4$t%Q8(F*rAu7NS5 zm5~0`)4bAZO%JDZO71Sgt<4UG`N7_;te$acxAsqrk3WAJh-q*=t~S9;QgRc$ z!y;g!V#oQbm_EDjdo`?6U7GnUZK=U?61w-|Y4B)ycXkdP?NhzfkW91pG5ttbST`V) zs3ho6JUk(|BILqi=UyC9w?y42TdCF>4>GLtEbO(#3V6G2E2qRS$E%q(dQT5e*~*Ha z#1E)ub<&&FOm6^~eSx>Cs{FgrR&7Qtn%|Qa4)R1vLkVHrH<~M93h{a1=s6DVP88D; ze|F?3{j+UyyuJD=TU%7rctFCrs3B`vaAY;+IfULSuv^bmH{r^U{^2QHKF!>;=$iR0 z6cJZ%Vn|@JPvDe8fp^@!Fp1a z&|_YnQsWR5(!zoy^jM2nu^i4mM?*{Te;(A+-oCOKB1!+wg5lnF4-1PWd%Ukxn&?xN zmzwM_S6pScM_&ocMR8^(24qNoEAW~Gl&I!C?@Jp4@B_~ihjGlm4?lRL4Y?(rdV?hZ zJy^6sJ`sOcL7eLyIXi)lWC7p68!4FUl8=ruUHawGuafC#{i@${MU%JWV=LBg8;0E{ zxJ$EblU`Glu963^Xrr-}eO^lD*=o9;O)Sg5a|bTtj`SX`_}LW7EHr}D39rer$@-oF z-yG6zTrpc>2Qj_2Tn;1=wTP3owZC0p6YFCbNAkuaJiWM(biewX09^(AHrG*)w{8!1 zGS3YlW8)kfr!E5%s+eVs=lR-gc31ing;WG7Yb@?5?o~2+MXo$qd&!@K8Z_yDT;ISz znMvmd7x~#L@~Nwf3ywajA{y1o^4)igUi@}&US{Rwr`WZvCk**0gLF_w>ahdGdtx& zx=jllh^c8Xr;I8 zE-vsA)E7{^Qa@xH9BkVyim4ie`z)j&RNR6HDWBa$Z<)XNkbrj)udS^!-Ft7RV%z5R zXeFkzifa2&YU^ub?B%y4gx8f-%#ijW&8hKtGv~jB_Iv&GCUBp{4(YSl#G44#H#cd% z|CTK_v9j_?kNYqbt-vs@b6ItJw>U5|B&V#*&K`Tm%jxK_w@{e!C{<&@js&UZcAPlM zk;;_iT<%3r)+z-oA)0Rf!|TGr^@ZNa=7+ycf5yge`8q#6-eT>0NzqzeZ%H{FekzE1 zoehedwdEEorQi2eMzk9Urwj$6TfDGW4Sz4`2`{AX*VL7(8ClL3s$H2mC@FM2@!Z~b zA<<+Jg~(b0s8h(3c8$85&`Zcj7O97?BP}bxDw2^nt?s~huB1p%5Bs;DDp{p$w|}BN zse){njkZ}i>A5nmaM(i@Yqd1KLhUEnEon=J2lueJ5gEr==9O~ zA^swN6zvSv`SDc4m%GWmG4$NV^5mEGJofJV#+t(sh-_07?A5QixSb;&KR*>Q2x+C5 zo(|Mpj}DJqURb&rHC|Nz1P5@MIOFE-M_y{Gmig~RL@_<{sv45h+MPj5g!$SM8z;PN zzKTC^_>@MQR4{0zX;#r|$li+=*A>LN+nj5E{o8IB5_?1js%QK|z}l(eqMX)&R;%+Z zy`TBa+}xHi%MHf_dq9>j-T~*2wgl|to+r!2ks1vzCPsEs3vWuw|3)n@D*6H`xO392 zm(ls!^sY93L~FCnu+@NSx#ey71NGBa29ogWAxu0oovA8m>E`^Bl7n52!humP;Y>_^!=bNoBgE;HCuDvPvFuWDG|XR z1}IYlLkqvV>p5WWXdndzHxJvVkB9TlC8eb!1FpM=Ectc*HOT3ml*oM@ceFTYeL<1% z2nYxj4KNg9MtUy|kBw`rJ6?AW7np7Ljvl+O2qEgr-c?UA`%1ij3FG}mdYTs|xx({4 zJq!b?pIdDjrwwZpt8LX>)gUk#8E`w~T`aE4RU57Vc}yTt_>S4lBZ&N#-nPNab_oyj zjA=J)ILR$Ei;IfPM++{c78r4Vf;_!!$z!6(_)B=JBMmv-?m|P=89jW>`xl4?D=Xip z*XUV4_5LAC<#Z@Yb_yR(W}jXXFRYTijw$3*ehfgh7il(HdEG5s2Zu?Vv(kf> z=HkAz<2s_c0uoXayZD?aXE^REdybN$O+CDG)_Dw4A?BX?_fS_P%iYqf{c5>$Q z)o#tyhQMfq#G5b%B)dIXM}^%ptzRXQ*x(8V9IC(A$@N;-f}O)VT~E< zWof6Nn80DZb+fQA?IM6G5gQ)v?5;KZKh+7f_a9vLne5CBrdZC(3$tbu2LH} zxpoExPZ@D!rj7v@v+u-Z>utTS$9TOmfh}}KP8%H zQuDryfEK5-yi8egTU%Q*ngCroXy&V`&Rn54ZD{?Nprl?hh>vr3=is__?V?uj5D-X~ zmrqA{bQ0&P8IY2m3dx1z+ua?tRoACrAx{x|;c!4bWUz+1H{KV;6ozGQZ;znq@~V1d z;_h-E5PKQ!rYHyEsrU({Z=?MA;_0=zK_wa6mGA80PkD+;*{0SHMH>{W_L5CKJv|?W zGt@m$D2pDbu0A-QA6b{;qbYq^5C5vZ-}?Y}J0m0XtQ4V!3v~1iD#a8XTvE+yHNLGZ zd*v(AaJaA))l#h?LVzeF;+>FH58w3E5pBcShCx@-t+0}rg9B^n&#(=$_wRRGAqN4KLx#MFzB4l09Y{6Cz#!9^bxYkd$&I-4HL3wS5kGdU_(@C{Y>X8wtwpP)iJ7ThnKPMlAf35iEoa zxINtgV>v4oq5V{*U8dE@L_;HSWAZJDxT!Gozw)uwd@dt-pQev0{z`skX7D4v@CP7@ zz~}u^C$>#l{0!TcFmSmHH0*)+=k?QjJ$DWdX=!Odgiqms3No|RCT?n8&*;^JcCY$z1X z^4~R~wq4~7*`Ta}@}6Vyc5lz3TC+=J^PoV19B@}in8X*?iF}rS532jJ7_1oxKxO#! zzzEc%^Z7?#9@49Af_->}p&&00xVON)X4g&6gb{HiUe?OXzou-Upr_s!Ss~-ks-&gd zYz4H2f8(0f*47&I{e1l6jT6|lJD*OglgnkN-IB(3X~IteTki{lnwbSniJu1q?*j$` zpeZz95!7idt@i%To;hlxrGg%$`W$Hq9Fk1RYEQo$6Z&z&UP?>-M9inOoBq;ul)bpDxHD%H^w z;4?;-7Y4cpz=7;=zoJWRvg-<#>ZI+YdH%<{Ptx{s`f!H#+Oaf2?fF_CdU|ZElAgfR zlV{WQvDU}Iu|kw2P#vYk`S{+HJPeiGpB)ewI(-E1ze_7cVl@aD-Rvwl3g z=m9u|LCo_$GBLB8H?b)EOYYOdxewq74=2=g_c&&;rd6-K7QQ>D?h8MVkZNeMH5mIH z`_U$jcA^tNF{nnS@{@dW{zKcAxK!y;#B#{wIU4u);dhZ32Fa^7F@@xf^?>jW-4rgz zJ6TiHK#bJ%TfmZUYG6u3Ib9s{bRJJG|N@CR{(VNMY6^g^nMJgJr+n=^wJYd-10< znlLbonTbhddJLP&lQZQ^hLXfir_C+$l$PnJw^#*FA$p<1g{G`@_;b%a>5*%FgcX^urS^LTANL(C_)0LBSSXey| zn)zmeq%qJ7O!yc4y!iQNU6U}7TlKo`^*c8{Pukn~NZ86#b`#UYP<+`IL&EX#wl%2p zAKD|#_DFexJ~vEAO${$q8tEILh^VWoo^b5#?C8El%tSneorbj%d7iHIfKfpu$^!Aa z=EypCQUDDCxJ&3#FK*rAss^icHc!0sC@JC}`x?ch;2>p%+R;>!b}@%-YiBo>?f;+6(B2!b(Kd@jy>YL4te#d_uwOU_zkDWV`%B7*I3RrLh&~B`C#CH_rhRs^| z?uW=T~J(<(dwljvyZLs%cxiX9`aN7|M=Cvc54AW z`j0p3;nRToxT@v%5(2~{CPN8Kmv!Etfj(Pl1nLQZ8NLiyGOD!7ht7b36i2Vc1)ZWb zG1*osR-SadCg9l(`S}yjzhbljL9PQZBLSxl;T!OI#w{T)ZvhMR6DQ|Fo43xF!|jcY z2f!RY-x_8CA0VDNrw{C^C@%-JfUUM>r^6&)-{%4^AL0R%95DUIw@1@mc>t$09T6Sh z(A->IU43o^X@JZM@XWS{63JVxK(OM%L+rh^e)pa1M$4}s5boU}5rFcOdj(Rf_`%In z7r^-{AmM0oI<#|eC~4_gSa<*ux`%WyM>%fIsOV@oKp}jC?iDd1lUJ{4WoLH=827o? zuY9Prd;AL`vYOnlOCuxYKmwgJ(F(j$lzg@v(B{Po_##d&>FepqvC#ix9O`uiD5-d> z$@DBOX=`cm($kl&Vn)e!gu2~bn5n5HDy}1D#?b-ap}m{|b{#Y~xu995W@bITy=UZk z00V&4H>)e8)j_9^x&krLYpEvM9st|upH8#3)(L2RlG!b%O4PE8icledq(=gzoq$^f zG&i@qs#;n^FJG41ZGHg?x93P0Jg#ROf3n5q=H>`EtQ6(t)q(gXG7>u;PWVR(XpRFe zcSw&p*luOzsZM{i`^$YK2;gr0Q+-NFU0z>1+uN@nr$j_Vq@;iiQAhNxa6R7wi4qv> z1qq+W)=(m#Pyp&*A?-)+`zyNm!CxdCv^d3pN(9tCiXUpOH2nSj!TVXT5+?JMal2g< z08&=;z($*cy}d?@8(Vl^NLW~_!|rQ=mxuFpZSL3futm_jdJggJDEPFnxCr!1yHZ}k zH}K&jD>LX(@mX19)YSQ4>{Fe*ygYL#^zEBBr!LKaUz?ec?UUIZfm>h1r)Sr?SNmmS6A-1uMGMsK>Wf!i|2VprC-Dl9iL=a4^FJiao}YqcDAtn@=u@ zzl4X_G%Rp&1F7usOvU=iYS(kb7pCQG0=5dk=Act0fNYbPkT78ZL=@}b)chVCR4Gvv z$dnyP=jS&aBL$?Lr>4`>Q*zOt1+%xnc))(*vRfp4lL4|{(EWN<{#-D4|6+F>_`(`k z03e7|sFwKnJR>L12JAF1u$b)ZS3nl^;0$i;*RM!PO@&_N|1kv?`}Tq6-Me@41w@bx zuvHLQps{i%hZzBt9n^ZmuA$0!cn+vJ0Jac7rN(4WfxHE1!Vb5$RovHsq-7XL>L&k* zoQE*YNJvPY0Q(mRVIs-JrVb=?<=roK^eilB4;TVv2kj2$*bEHTfgEJoB6ECKlYY9l zH%}p78uIeZn@K>g11oU4EF~_^2=rIG2M|c?mp2cXwTintJ3z{MXFCqS41&y)HO zK*5>a+S+=E2OAFflau4)O~AxK{8FafV#+!POdRs9ZyoeC52kgzf5B*gq!yTf)YIIv zBec&ys(~C4>g~%(DE%)XIh=>ele4hc1F`HwI>1~I+ykJ6TM6fXQUyZOP|}w*T zMcoDDI}C1aZUO>=^T0efQYxygnF<4tr4)TKVPR%ovugm#R>%o>5pWk^WIbyKR^xuT zhX6TVZrjErNayo(JKGot^^B-|`5rrXg+Jz>S`21+)PyWC0OU^QtpWgHqp4-CuC9O< z&~_L@l$x=;1N`Lx=)b=UASz0T5ePd6yic%bw6wH;$H#9Cy4aR80Nw*1bPXQTA(JWl zzSAQpsiYPF_>;3CU>5_RV-xxVwop+{t}rMWK%kD-?IzG?f};r7yTgBTzXHwq{tG(q zUkWE`lG4&6!2AHbDX6R80sI2q^E5Eb{9vwTA4GRRfFrM@^iOj?$`7-ELb-L@AwY}_ z;j~>72GmjDl>lQLR6dg*EJe=2mMzxXl7j=y!BGcb3v3?1P2jCV7>~&)lz51bffOCQ z9>8gu4{QoHUA|oRgG$e8|FjxaxFL>p37=gpYe4M@1%IOfA|WCzTq>yR^Zx)hHm(H# literal 0 HcmV?d00001 diff --git a/docs/source/_static/logo/pydra_logo.jpg b/docs/source/_static/logo/pydra_logo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8cdefaad2e3420d0900ce22a6c95dcc73df8ba38 GIT binary patch literal 14825 zcmeHtcU05OwrBvsf~W`qrKwcu(gg(klp-C35IRaHq4y3pgdkv0Ktd5BC4?p=0RjP( z-VG3X@4ZL|ffqdYyziXz?mh3mx9+>^{c-kht(p1F?Ad$Io|!#+%IU!A1mK#QvZ^wG zj0^xEBYl9=VZdVm1^F4Ag%l(>M|JKDE>KZXQeC`o>C(ju7cXAAeD(4r>MPV2FVfJ` zT)BGf8tt`9m#@=ZzeYz&Uppg0c2<+(+yzp_Yt$F1NxA=ra9RtXy+GzqE=55`3m~T@ zqo5@_tp~7?ge0dRI|IPK!G()d=PyxGoFgMAl^b3IkWrA6IA5TqzHs6E844Ncmy+r{ z?S+fi@6d^O)8CM~{|7j(6nK+eRQI`?S4np-10%;%ct{%mnMfSZ82^a+zgVLUeO%+EpRIo#lI^0pb1V})`E6kK)$5|R^M9gU zxzZ%S!WC^OyCNCA^({O^>vOrQxAe0~0rzj?&r5w$FF!u?Jg$31s&mmQ{>l-}m#PiR zTRqupEF|jp)mgs2ucV&Oa&n2OO%3EYwAQTYj@@d-+v;@=N zVjx`*n2**TgThtFXeyC@EoJ%A-QKkSE6tOa_$2!J7I`Zt^z+kDwYH=L-3coGX=m|7 zF}A}RK;F7%<4ad>mXk!}RinvGVe)T{gxEHw79TT1eN6_ZF{glI1l=43-&*SWCzf1X zKxS6N1`tNHbl%=f$Ui_#ZWl4JW}Z|*sgE<^J8x2N&i+#&sZc{E_P^+gJ593erk6F> zrlBUU8OFl8E{pbh7Jnh;)(ODN-i`=WFPj&rozvf4mNefAK$u6B1(lvJGb-Lo8tLA4 zs$5OeeDI}xn8Cv)i?!o{n2Louq`p_9Fe42A&gL zRB|y_NKfNX+6%H-KQ9f6CcZP%QzGVUTyh*na`R?!_-Q72DmIAMy9CH>8Xe7NGaq_q zFQ@vNJb>@|PUiho87wbzujm@$2IE`jWxg4s(KvJ=i;7Th^I=_*gLRNWn4QsI+j!vx zUhlf40B!7I~o0^<;YoUBVCcmv>+#`=P3=>gNN6{s_2)W8Ti)rcJbwYn$ysyGjE%`irE2euS|a*X_y`8I zdfdU=MsDx&@3Z4=leTxE`%>qzG^J(RFv3E3h+Erec?j=OMJ28&!N3P@;%K)f*U7^1 z$_;op$W^@V85W-dH(|F*VNO7Bz$LWfF1X#GmRtpyiA-O6OZ%e4nF!Kj6QO3g`|;Ut zk)Xj1fnE=rA;tBDrKo4Jx|eyRRw(t zK*yo>B!EHf5#Ph{JQdVFzw7-qcoi^JrIR>R=|bqFWDTW2j=p-JRh&@b8r)A-UHez0 zzl-xm=gipz%;1?jt#yAC7n5v=g<}R|Tc`^b_^$fat*zO@#ZaUxz;tYm zS`aZ?e8P*Ec0i)x<-3^hg9)pVs9l*c93+R>5ptKVx&$y`^4d9U@@1Zc2wvQS8~c4* ze;)`;Q^`9T{Z_o@GXPm!6g}*JDMInZpD?yTZZv|3 z_JnzpiIlHxg6tB-YBK?F-TUm|mh6NVdif7u29KVY_(jxL$7LU;$yNTsZqY>q@oVoq z{3y%3TJln|2{Q#rmI^Dl;XnCiiFXk6*hI|6BR!?iFbg^q4udUZLb8ork%iR{Ib8ww z1ok>vq~A-cM0f-Ml{ou;NvrncKX6vA&I03$>Zn0}?A0no;khmMG3IHvUCKh@$1L51 zGaXJ)SDjA*f>rfX7HU1mI$UGTNxDjyRErT0_Q8ZQ0bz&C3YkGQ%#L8(PK*`PB3mZ| z(pAE|Zxe#6U`sC7r&&0k6u!xQ>)Wo(h&F89Q!+pa%cySvHs~C9_ELP=V{BFt5{RY@ zo)0^yO8Z7EF>=>CXX-Yqs6FA;Szv=5-tHM!jJC>3P?|x_DMxnbwjv<3@s&Yry*t1w zu`yJ$`2r6hc~OF1h59aVmf74noS!b;j8>XxAa(WdeS7|zO@5w@H)|1|is}M8l7mTI zh}_Je$7XFU-^V{*wQ|N+6*EIdKY*`J7$2n+((6qzU20hY`cHunZnO(c!kyJV8t<}bN;JC|-nP%(%zNf-I*=kS?kI@dxvTI_AvS^IVcqs1f(pbkj zR#a)Hj?{&=TGzw#a3O7LK?+#P&lq1gtct1tyfLDQu1AZ@N6>RFRPljH&GliR{*Py7sc48p=`SE?_?jdm$ui;3 z>B!Nw5brX@E>`C{;1G!hJ)xp~u*A+^bew4g2m_wl`8^Zk+5LUJ8H#qzEm@;4`+ao@ ze4k-h|9m;9#nFpZ!?M2BTk0lsMGPjh#Kwr4)+Tjz6Lql@Rfa6(vMqX=Q$SqhvU|cc zP}3htM&RJ@tOEH_{P`G1T#pDkc_!{YdmJjVkXuygI8>XZprgICGmoE2qP5D-sXpFs zTPrnM(~ORYT8lOu3E#I?)CWZ?V_MLVa4PfP#)lMK>KL<`oN5?kvM|jPXXYn)4{vtc z&b+JZ>JlG#TAOd9+gI+WgD!hKmN*<|fj9Shm=jT!{2<0yWH#3${vO<@9^2FC?X@!f zrjG^c>k1!Jp{_CMyw&OPC(x>%JzE9Dy7+Nd)yY%8{O|IwvkqS!6l`ga{4SHt@1mowSsug)$AX#*>MwQAihRy_N?h` znlzZz_LH^WCIJO}} z30Pu`Wx$*&f0mEJ{L^fU_k0ho_S==3Zc>hg)mf*u^Bug$UA!3aW4=WOs|2U@s#;@(eQD{nXCp>mcYN(%8^Rk45K{CFi7cU?w z?vLSA1S#gC70qtxZ2$Cd)O0VxE$ubaemv_SGJ^BBR30Ps!Eyy?==d|eD@RvvUcNaH zE*Jdu(hdb@+XMB-uf#YmSy5-`wW(O*e~h4SYV~>+!fp<75W;5_!F&>)O8T0lN$Q-R zFS*z$m?tc%H@2Kvw=a70KFSqCsUDw*M8DNDMG>KR3)HZj&OiXR?L{x z%tPLgAfI=Q*H#3D_usezf1U~D)R8Ee+#VIT#dJ9!lwZc!ul#xC;JkmPV0RoNC|Xe~ zn;V@6r;C79VU#>fS%Efv59$WLgl?QzPO8PF5+VeDn4-q2lHslO?h-9C9$u5~USHt4 z{-MpGXT(YH&pZ&(s?3etSF(?tcRn>2&Vn+&;1djDD|^^mLVs8NOYjd~gBjHL1s|Y~{u)55*Uz~PryrD**Ksyg)5tOKDLEZXbC1&H#Ocl-H=NyNw z`U$oB-3dc4IS%ah#JCAR<|*3MkTq^CjklhOO3f0wn==rtKLzHiQFO<|=ueWv#1up} zwOnjThDppV-vr`f7>iRoEE558K$EM!L;*o2%uG>M%;<$ajKmDpJ*w_OA0Bkcy26Gs zsNu=MqOg=b{(~~!Ft1DzFKDv=VGRASMSO`k!rPbTi4Ok_$?$8Ja>q^JZ3UNx*a#|U z`*P#m-Q~9N4&@z1P4QDehe#*r6wvJ=Z%k=Gwv&;IPD6)Pzsxd*ARL&*#UAZgO$UGt zDhjmhQ79NJnV@qe4r||eN#m$F5SS$18JSfx^d+_ej{IU-5Mzk`qUpCVI31U>fn+~; zIGE;9Qik;u@^{jU7IET&^ct-ab1h5Ia<^`|bwjVqOK8ujva5@Rbn0*{&{=slDvC*Z zC%$DF6I|4_Y%{hU%K#S@L^%tCY^Z^Z2x*pBEu!MglJyc%g8BzJMykt+LNDyAB-7yZ z)e#1!Ji~R{!j7ngXcNz?li3j+J?0l7O!G^y$b9Z(0tkS*4KUQIGs%Z+^^MicRmOctA*HQ2@q#d`ykMK z24%#llC6wzZI0;hQkqROxCW3B91yEt<_(ZKzv{}oa_RZ_^XWUUWNFVA_K}&V0rp(u z$(syMuF`@Fvd(cC1$96n?Y(DM}>3uOe8(pUN49*%zmbM zsx9g;yJ&5dn!AZ!I)7Nc5?PGtpe|&d`rfPiq~;bghYI^I`?Nn+aeJn- ztgrao*xY6)>Gy4?-`vfv8gVQ@$0A|H(VRwn3=)a0`_|U>dq|Er*t-d3BwCt0qn6B9 zHo3^g{LUa-fS?}HC``oc`>+Pl8~12`0zH$exX70=yRlTQ!^2rH5wWg4hta*-flE=;@&U3^`H5G-`OMo{kb;I_O>@O?Oa4RHZ518gbNIqXyQJ| z96ma-ndz*4AHjuaiJKLEV|sIl*(a}2DTFVdi**H88aI~6pz$|G4e;oAEtnhp`Lke% z1|{8Zb0^=c4=Upu3h=_kLeRJzqjT5(V5(NNc1KFQgLrtvfoN%^(I`frO8)0V$I16F z<`7#iYRmS^AhiKz#cc?#?T?0rAL#7EMc|D^>7!IC332_9Yg(=n-aQp!Dy3H*O>1Q4 zD0A+$LA%E}M)DmcHu==m+0h=8j)cYnZGK<4AK!}(FB?`HTya3q_juA~-iY<`hDL)m z{0WiJ?g36Rg~4Ev9YDbT3&*67V)=Y4pT_N$^DXnR2PQCqm4K01L(Yk?{3xZFzM@5~ z8O3GKpF$~+bA7|w(kbsNMXjppzvOr_8s>MF4Y4%1FhiH(ZogPnaW{|Z_C$YhoKSA9OY6)31X3Q_qYw)n8!843wu4i zlCMsBI4~$0zhyrFva15+AIg}gr6DmtdWHo{-**!T_07B!Z^E0WqQaw~OuaKviX>Q4 zq*y!0JPw?C-ANn{j&{Af0$^oWxjE(a5ufyGx145|^4|K_&G+9@!e8|-0}6G-9>k@k z3u{^8wEvLbeLF)INquxJPp&BtetLWg=1?1~3BcCgeEFMCuR z%esp=UdBgLJ-)1|^`x&TJPH!{BsBO30Pt&%`VD1PSU^fQHZ?jrUP@C=NijeA?iUDT zUb<6X8AFFa7lfbV>&#zczBl^@q?AA@zx=8HiHtITUb~~g?h|6q8>0I{jM7he2DMRI zza0pz6q8G7=W?9NA=$@ILUn2=R6b4}>*Y)pIIYFr3HIOxOo|^MVExdSor-WR*dF$JZZt9*ES@g3fKM;r_egU_^xLahXoifnZ!@^nMmGIJ_MH;$`u#UZx*gWX+R zvTS>jL! zP0^}f2hhW3cas5ZnT408B4=gME1#SfU0=i)jtUC0#1^rI!}388J}45oxS4jVWZf_3 z6p%LXityoBN3K%qkzM9NhfO5c=4eCLUXJQpes0jgeJdTQKHW9k(3bcTZ&vw)r|*2n5j*FpQeOX3F%HxXL6htZDpZ5TEQ#t8#*`%N~=bl|4~U zg^NqRz)>bErxCp2nIxV19~XkD76x{mOH(z$`u5c^Rk!oy^ zpVMyWmc8lCxX5!cWv0+!xyh9&t>{s50StfRxNkvJw3R6?psYxOsB;CL_L9kvBP%Nc zu0%%*FNz}MLAb-iBYML_g8=hZF55Yp#amXm1n|*=V}*%@!EeW@*Elw=TX|RJBx2XV zf=}5?Dxtax()& zkoV}K6u7S6+uof1CXzE&uDP!Wg+QYVITf|<(hHY0j@>AOTz0T{?HV;zFdi3=i-wIl z#~9*2dODl&L-Z6q;@Z9>GFOP8#@%D4Ko`Q!@8o-oL_tBOxkz>%2^}eeO9R}lacZ(ux?dOp$W02n1IWE+N=cKa-JGstUQv=>%(eg+GQ=N`Y2@7a7i z6nrl$8P>(3{r<-HY^LY3HU+q1^f(uWFk-}kd@|1I0%lj<6=o=sYRiIEMHz5h)#6TV zc-m_zpckH9q-Sf39j(YrDkon}FnmQlX{;VJStRLznm{>rJejr5`CgdVu^dH*5axPH zEh|3?o0=7qouo|p?A+mIMoKMC;3pF+UVIJuKsmG#e+>~`=aP->Y0=fzFRs3C1!bDg zwL9CQNf2lyezG|`wt@K&b9PPIc1cMx#MZ2e#7+B=@eckqkLjam%PBRcOR{e|zKR;! z8}EMag>fs6(FY{2+1G=J;M8ohNbv!rV9&-|vm{;1aOCIL_qwgq0KzFU@2uGJR{!Gz z-T!bqRT{PBB#3aGU7CDbDE+>4kJ?PX6y$nz)%+ydek?D*H;EkM=>d{Z(zjqw@thC= zDuU?uwfET8aIR8I;b`8C&uenyku`-jE;9TJ>M?^|X5$gF%WUgaR2&I;0V&4;{C)(B zN2bA@vNd#VeR7=?v}fT7KCWt^FkMK&j`cVCJXtuY(tL<{>zF6_N5ZX===J)aJ24)i z_3otbPwGuRQ-1zU6}6Vqq;R~yxGV*@&DFFrUAg%HZ#6O@!h*LN#kg;Y-m`^1M->H= zR~_-%BaH6YsCcwd681U^OGI5UWU?|G3g?(y#D zlSY5vDd4S1?bmC+ZRQ0fd>MPc_^w^BeCdyj{wi$lu35BW;8U4VZg`&vqlJ5~7R*QZ zn?}tAdV9d-&mvpQbaSa(wgL@1-JlT@r!o^(@feSh{e?yLOshCkGugwZ%f%euP%GL4tz*D zWO*1~tg6<6C}|AG2fBGiodQr-5?1yi&F_@zjIa1y;Lk7Pv(LS|l;$tbI{`=SK563N zf+EiDX<+A9{^{im`6r(N-{xnX^?xkygo05w`yTtcw-xjm%emi{aRvY`ELJJp?0+T~ zS@Lwx*~&r!lpb%RB7BHD{Dhjdh~q%{XE%m=)B~s^N481|$kJOcO1`QxL>1h}Zh4_I zv3*_FpBy={Fe&E0HS?EdJ4uRIC4kUL`7}`r3Xe=*M zuO^(i@0{^aZt%xypsSs|(^$z(;4gIPN1x)QZm_?#kp#D$PuRPlp~%AgW~if$UPdre zg8>^ z*Gwa!Ki@{idEZ4;rAr*=svP`uS?VYC3u--NfB6vPQ})a2SVE{{n6c7cgUh=?u>JMd zBSznN;t#>kIGomnFCnT8Jtdm{n)f};3#tQgMTRF#sO*xOK$rcKl>2!;H2Qjj^xr2YTMk zE7J`JntrMVfnr{WXkHsd6@5KugSLD`%k?K_EF|yB-^+{aC4hoZXXw2ZxyhP)@p&s_ z-)N!NR}&QPCsZ^7>2Ur201U^@pFEc>Z(6$;!YP{71V2!DBbszoQhvHlvKbX`p!{wv zVtXppOK~PT9<0hyGv@yy89-hv)nhFOTBd$3m%idj10V1LUmeeYg3LdDP;F4IL_kA#5Qn;Cqr-1rSN}HB6Q=|Tbl3Un z(1g?geO+~3<6|ms_Nx|ot?*M@h;p^{_(aB+c6Ht+vjQKbs2f(<)V0AwB}AF9lX3Y~0rm>%*(KlqSdI{>AcXTq9@37Hj z5x7NbuBu9aP(GJWmePl(wm!c0O`|B>;Yb*^5hSBBnOhX61>b+J*z!>H0k&TJzKVz| zRsbhzM;LU`mtqS0-s>5Cb|_0a1`aPECK1%oeFydl~JFn=~F-ulbE$xp=cfZJ;5y+D}Gyc6uQD9tK$1|kbtNN zd*Hny+SUU@)T)&ar;?`loVS%LSOnOhui>jaZ!31*gVq$J(ezrRqZFIewwj)E@8b=& z@xUxEm|B1D5tlaolbOUj${Vgvjzd0p;}&O@Icq2R>om>qW3s6m_l-A33&z%2TWl)( z8;XawGDEt7N$STD-N0dWdC%8z_gT)ANk+X_v7cxj9Edr-N3R8-&K}90DuY#qn4GY> zkk>x^t#5$7_hDX?x`DvAqesTP_Z`;19(RnH_Clm^lhDjecwj-m{y{1zTLDh(P$B`y0tE=2?qiP>zd))ATk03Mh+ zy!tx!{5gL$RU<=rfu#B>ODS->7!n~MDq%cBl*V|P={A#|6_8FP!qaplhncTirM%xn zy5%b^lbPFjI7*$wgB1hp?Yus$}1RVz35wvN!E2 zQ4h1{E6|T0R?5P_S`SkbS!PkOT=SRh`=n_L32UJKH%#FLH0S=|g~I=45xhE$=*JlD zenHF-boKQ0$s>GTJ2Hl29U6gyuR@)GUI!*VpR-;fCEbqB3&iwP@aXRqZAxF~Wo^;z zuteh}Tp!;C0NzV87Ad>V8&K8zmAC1w_)=dsT4VHEmuNTO^6#!aV7R0#ENwb3_QLu^ zGj$mRmbftyQwylbZy6IB=*JcxM%`vO?~Q$Gp6?Y8*_GbLZ2#2L>Z>r$Wgg0RXU`U& z$!$NP6Yty)ia^)}ieL3*Yq$7;I|XQff;`Hqir9kg436E1k$>;=Vx{4jx^TMz+|N22 zzO{JX*Yd&PA7kfB7`KILdVm(DKJgq~8LX4Pi~gkHHfDHMIgC^$9y~D2H};i@|E8s{QH&<_s?;;4eP-G#DK?OyMpcmkf`pS_4de9 z^gO+&S#M!fG6HA}Dzx!;VW z-^kCe5c|)M@?@GbL?cz%5XCoo6!l#y`ZYnI$IpIqCS97~cWzE=LcaN` z+qIbzWN=4%drIEg?uB8|SMDwjBasWSYQ?eOSXX`Dm(bkg*3;bdQ>8M2A^9MUNp`4wK)c;;53#xXm5tc=J=jX2 zsy_zvf_;PG-1kJQSNZoTD2DJ+Uk7rMR9!L_XOhw*@S4+codG1PKWp*sY9NhpN)n6X zWT^osvSF|#LBW#|8bs|>N@Q#6?{?M8h=Jd%(|+J&;lt4nQZo04B5yP8{S8Y6ueaV= z4L2;0oa?a<;@6;YSp`f>($x!VT?^R}3;UzrmolNH9=e|T`exF|7d#~WpZ|^vgUu_+ zVP{n+&A9ZP!~FfGRXO6NxTCEQtGXTfASKc(iL}QfeWS9)+_t7EzL)_QUk0=aPBkfv zKau7EIwvdT+-4iSJ;F!Z+sO3w@6MN&gq9A^bI;1x*Z;{!#)Qv}di|?0#INucl)ynaF(#2ch){Leo&4WRI%MU{d%vM8szc%l zz{rhN3&xM9@SnWzSQXyod1Cjs&h8n;U*HxW0^u*?0OgyvzW1<6RUlTHsZS&aT>0!~8zD$>6b|97iUKW6Z=RSx#ucg6=`hUcQ+R-9U#mbI3j_HHiHySr_Aw)c!L zW~FLX2bPQYt}&c#R6=$q8?1aG$kfhLK(5P?0Wme;`LUTrX)=DDB_%}5c{f_C=F1Q^ zZ)^DK($aB;PnEmZG3zR+NCo3$G>^SSBpy9k_nlcjlpT_mIf;P`m2^2Oa72U6URXEPxe>x$+H z`@Dpw_F`jyw~?(W^#(-eQY zv0T?!@BNuOEf5bQy*4g!2zfL@#`k)I~%^EN2wi#jP+Iixdd)t!U1bsJ1ZXsA z?U#zM;ocvk0UiatZV7PbK}QQ?ze=vm=woXIdF2pO~rwA=_@_6iDZhLz6n?|h3;Ce!GD^`pNRkQnEeY5^?zyO*r+6H z4HZOIYCJwo)oJ@+Jv%;EWKT^PF&9!-@OeS9D}DGda%dh;{BQ**XH{1=Gt}pc6ay#R+YfMvT7{}iZvNDj?MHvXRWph!JNm+FQ3Z)=8#G}?sBPfPqigtidEX4xlHkcPC zmqjxmc36z5N)Q2^ScKAU=s|=+M@0+N;swgRrIbrAPd@oc_e`oJx(3sof@Nk?m=LJnTsREhJ%-8LR(ZaIAH*}l^<-v<#_ za)ZY>(l-}2-EwhqY{Yr~dP#da{_8zSIoVorIRjt1Y!0fM?%3YOahF_; zCfDoxNu^GZHgEZRsr)*FNYQr+ILc&9_5}sO!bGg*E(k#jpY}sH2?KM|zJ+dTyIbkdKZUv4fA7$WNF_`Yv2&&a2R^g6 zA&q5tkn;QOMvh8RaE~B-JT%l4-mDoY;B5|*p)7+U2%C~nf>amf70ZgKZF7BeYq!~))3mLSW1wN!%%?7^He+$r4*ZtZ9TZQbdcB@j!&nfV= zP<{Z%IM)eb*R^@<0LuFKB5~adIenB!ULF)YXn|y?S3mB3TNkMw*Y}7_U4*i$tZjTH z>Xk`!W<=(1Q3?uN^rV(+c9MCM{`LIQSU=R9do=S2S9cBrUM!e1RHiOX4SK7Z?yv(K zYg=k=lP`@mby7@&y^yN3%1&agXisM?zR^rVTH#lSev72yabsV@!SFKGX>X+DwA$8& z7+MrAAfxIXii(_OK!ck!3hF2`((`5b*>~2od@fchhp5SX)1%%{)kP%hd@}<-=}Pr{ zX&mkiwUi@~;wHuw*a_`Df#melT#a}4$$9&v`g}QlmWb@iOIRlV*A6mt84hn*P z5q8VRd}|#32sZWJ2BW#`#afcu&o?6lFUQH<2oKM$$z4Zpz1t8%FCyJA!)~0Ug$+O{ zBs-e27N!I*natTY6N>8fbWv;8JuxLv%2$bmX43}0N`-1WrC4`=pUOUu_xf@Gut$p8 zW}KSt=*uuoBydNaJVZ?~LmA&u#9OEoT9UR!RJ~nmm^r6KN%r;gs;F-*BzT$2GOKY% zG69%!LE8L-N-dUYF}b_~=`8awXOxu77{}EU5Jw2ktNJ)dp7+{;SVZ&C$e=T7KZZrv z!Q0qKwWPc2t9b)k)%?n)zs!aWi$x?jfy*Gn6~5Uo5U;bD!q&fa%6d=2ZuA&J&dK}P zvow#^huLzmr`)b)`f?AGB@HOQDIHqN2AC3`Vv>*ObppYYZkg>}f2W+P0js^nK8*Tp zH*^Jp@cJIL?pf~|ptxOo&y|{+IR52SUEe}UmE9x5$LxUjo$#qw>);Tp?~d6)_7?rJ zJmPG_``?zc#7gF}&MD>-c>VgC>j6}6-Bf&0X%`e|02Q#|>BLxf8x+VzK0<-6WnN>- z{df;~`l&*6&x1s1yBmEkwJ3hvSvG2G + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000000..2abd3c8c2f --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,396 @@ +# -*- coding: utf-8 -*- +# +# Pype9 documentation build configuration file, created by +# sphinx-quickstart on Thu Mar 30 21:41:02 2017. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. +from __future__ import print_function +import typing as ty +import datetime + +from pydra.engine import __version__ # noqa + + +authors = [("Nipype developers", "neuroimaging@python.org")] + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use op.abspath to make it absolute, like shown here. +# sys.path.insert(0, op.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.mathjax", + "nbsphinx", + "sphinx.ext.viewcode", + "sphinx.ext.napoleon", + "sphinx.ext.autosectionlabel", + "sphinxarg.ext", + "sphinx_click.ext", + "numpydoc", +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = ".rst" + +# The encoding of source files. +# source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = "Pydra" +author = ", ".join(a for a, _ in authors) +copyright = "{}, {}".format(datetime.datetime.now().year, author) + + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = ".".join(__version__.split(".")[:2]) +# The full version, including alpha/beta/rc tags. +release = __version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +# today = '' +# Else, today_fmt is used as the format for a strftime call. +# today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns: ty.List[str] = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +# default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +# add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +# add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +# show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = "lovelace" +pygments_dark_style = "fruity" + +# A list of ignored prefixes for module index sorting. +# modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +# keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = "furo" + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + "light_css_variables": { + "color-brand-primary": "#0e691b", + "color-brand-content": "#0e691b", + }, + "dark_css_variables": { + "color-brand-primary": "#5db754", + "color-brand-content": "#5db754", + }, +} + +# Add any paths that contain custom themes here, relative to this directory. +# html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +html_title = "Pydra v{}".format(__version__) + +# A shorter title for the navigation bar. Default is the same as html_title. +# html_short_title = 'Pydra v' + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "_static/logo/pydra_logo.png" + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +# html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +# html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +# html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +# html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +# html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +# html_additional_pages = {} + +# If false, no module index is generated. +# html_domain_indices = True + +# If false, no index is generated. +# html_use_index = True + +# If true, the index is split into individual pages for each letter. +# html_split_index = False + +# If true, links to the reST sources are added to the pages. +# html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +# html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +# html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +# html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +# html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +# html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +# html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +# html_search_scorer = 'scorer.js' + +language = "English" + +# Output file base name for HTML help builder. +htmlhelp_basename = "Pydra" + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { + # The paper size ('letterpaper' or 'a4paper'). + "papersize": "a4paper", + # The font size ('10pt', '11pt' or '12pt'). + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # 'preamble': '', + # Latex figure (float) alignment + # 'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, "pydra.tex", "Pydra Documentation", author, "manual"), +] + +# Autodoc settings +autodoc_default_options = { + "undoc-members": True, + "show-inheritance": True, +} + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +# latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +# latex_use_parts = False + +# If true, show page references after internal links. +# latex_show_pagerefs = False + +# If true, show URL addresses after external links. +# latex_show_urls = False + +# Documents to append as an appendix to all manuals. +# latex_appendices = [] + +# If false, no module index is generated. +# latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [(master_doc, "pydra", "Pydra Documentation", [author], 1)] + +# If true, show URL addresses after external links. +# man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ( + master_doc, + "Pydra", + "Pydra Documentation", + author, + "Pydra", + "Archive-centered analysis of neuroimaging data", + "Miscellaneous", + ), +] + +# Documents to append as an appendix to all manuals. +# texinfo_appendices = [] + +# If false, no module index is generated. +# texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +# texinfo_no_detailmenu = False + + +# -- Options for Epub output ---------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = project +epub_author = author +epub_publisher = author +epub_copyright = copyright + +# The basename for the epub file. It defaults to the project name. +# epub_basename = project + +# The HTML theme for the epub output. Since the default themes are not optimized +# for small screen space, using the same theme for HTML and epub output is +# usually not wise. This defaults to 'epub', a theme designed to save visual +# space. +# epub_theme = 'epub' + +# The language of the text. It defaults to the language option +# or 'en' if the language is not set. +# epub_language = '' + +# The scheme of the identifier. Typical schemes are ISBN or URL. +# epub_scheme = '' + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +# epub_identifier = '' + +# A unique identification for the text. +# epub_uid = '' + +# A tuple containing the cover image and cover page html template filenames. +# epub_cover = () + +# A sequence of (type, uri, title) tuples for the guide element of content.opf. +# epub_guide = () + +# HTML files that should be inserted before the pages created by sphinx. +# The format is a list of tuples containing the path and title. +# epub_pre_files = [] + +# HTML files that should be inserted after the pages created by sphinx. +# The format is a list of tuples containing the path and title. +# epub_post_files = [] + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ["search.html"] + +# The depth of the table of contents in toc.ncx. +# epub_tocdepth = 3 + +# Allow duplicate toc entries. +# epub_tocdup = True + +# Choose between 'default' and 'includehidden'. +# epub_tocscope = 'default' + +# Fix unsupported image types using the Pillow. +# epub_fix_images = False + +# Scale large images. +# epub_max_image_width = 0 + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +# epub_show_urls = 'inline' + +# If false, no index is generated. +# epub_use_index = True + + +# Example configuration for intersphinx: refer to the Python standard library. +intersphinx_mapping = {"python": ("https://docs.python.org/", None)} + +numpydoc_show_class_members = False diff --git a/docs/source/explanation/hashing-caching.rst b/docs/source/explanation/hashing-caching.rst new file mode 100644 index 0000000000..4945a158f9 --- /dev/null +++ b/docs/source/explanation/hashing-caching.rst @@ -0,0 +1,176 @@ +Hashing and Caching +=================== + +Work in progress.... + + + + + + + +## Overrides + + + + +## wrap_function_types + +def func(a: int) -> int: + """Sample function with inputs and outputs""" + return a * 2 + +SampleSpec = python.define( + func, + inputs={"a": float}, + outputs={"b": float}, +) + + + + + + + +## implicit_outputs_from_return_stmt + +@python.define +def SampleSpec(a: int, b: float) -> tuple[float, float]: + """Sample function for testing""" + c = a + b + d = a * b + return c, d + + +## Function docstr + + +@python.define(outputs=["c", "d"]) +def SampleSpec(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + :param a: First input to be inputted + :param b: Second input + :return c: Sum of a and b + :return d: product of a and b + """ + return a + b, a * b + +@python.define(outputs=["c", "d"]) +def SampleSpec(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + Args: + a: First input + to be inputted + b: Second input + + Returns: + c: Sum of a and b + d: Product of a and b + """ + return a + b, a * b + +@python.define( + outputs=["c", "d"] +) # Could potentiall read output names from doc-string instead +def SampleSpec(a: int, b: float) -> tuple[float, float]: + """Sample function for testing + + Parameters + ---------- + a: int + First input + to be inputted + b: float + Second input + + Returns + ------- + c : int + Sum of a and b + d : float + Product of a and b + """ + return a + b, a * b + + +## Canonical (dataclass-style) form + +@python.define +class SampleSpec: + """Sample class for testing + + Args: + a: First input + to be inputted + b: Second input + """ + + a: int + b: float = 2.0 + + class Outputs: + """ + Args: + c: Sum of a and b + d: Product of a and b + """ + + c: float + d: float + + @staticmethod + def function(a, b): + return a + b, a * b + + + +## With inheritance + +@python.define +class SampleSpec(PythonSpec["SampleSpec.Outputs"]): + """Sample class for testing + + Args: + a: First input + to be inputted + b: Second input + """ + + a: int + b: float + + class Outputs: + """ + Args: + c: Sum of a and b + d: Product of a and b + """ + + c: float + d: float + + @staticmethod + def function(a, b): + return a + b, a * b + + +## no_auto_attribs + +@python.define(auto_attribs=False) +class SampleSpec: + a: int = python.arg(help_string="First input to be inputted") + b: float = python.arg(help_string="Second input") + + x: int + + class Outputs: + c: float = python.out(help_string="Sum of a and b") + d: float = python.out(help_string="Product of a and b") + + y: str + + @staticmethod + def function(a, b): + return a + b, a * b diff --git a/docs/source/explanation/lazy-evaluation.rst b/docs/source/explanation/lazy-evaluation.rst new file mode 100644 index 0000000000..0d2b45fd72 --- /dev/null +++ b/docs/source/explanation/lazy-evaluation.rst @@ -0,0 +1,4 @@ +Lazy Execution +============== + +Work in progress.... diff --git a/docs/source/explanation/provenance.rst b/docs/source/explanation/provenance.rst new file mode 100644 index 0000000000..6f97a3ed96 --- /dev/null +++ b/docs/source/explanation/provenance.rst @@ -0,0 +1,4 @@ +Provenance +========== + +Work in progress.... diff --git a/docs/source/howto/create-task-package.rst b/docs/source/howto/create-task-package.rst new file mode 100644 index 0000000000..25cf3ccbbe --- /dev/null +++ b/docs/source/howto/create-task-package.rst @@ -0,0 +1,4 @@ +Creating a Task Package +======================= + +Work in progress.... diff --git a/docs/source/howto/install.rst b/docs/source/howto/install.rst new file mode 100644 index 0000000000..80c058b0bd --- /dev/null +++ b/docs/source/howto/install.rst @@ -0,0 +1,4 @@ +Installation +============ + +Work in progress.... diff --git a/docs/source/howto/port-from-nipype.rst b/docs/source/howto/port-from-nipype.rst new file mode 100644 index 0000000000..31a1e3ed0e --- /dev/null +++ b/docs/source/howto/port-from-nipype.rst @@ -0,0 +1,4 @@ +Porting tasks from Nipype +========================= + +Work in progress.... diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000000..7fa5264fb6 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,116 @@ +.. _home: + +Pydra +===== + +Pydra is a new lightweight dataflow engine written in Python. +Pydra is developed as an open-source project in the neuroimaging community, +but it is designed as a general-purpose dataflow engine to support any scientific domain. + +Scientific workflows often require sophisticated analyses that encompass a large collection +of algorithms. +The algorithms, that were originally not necessarily designed to work together, +and were written by different authors. +Some may be written in Python, while others might require calling external programs. +It is a common practice to create semi-manual workflows that require the scientists +to handle the files and interact with partial results from algorithms and external tools. +This approach is conceptually simple and easy to implement, but the resulting workflow +is often time consuming, error-prone and difficult to share with others. +Consistency, reproducibility and scalability demand scientific workflows +to be organized into fully automated pipelines. +This was the motivation behind Pydra - a new dataflow engine written in Python. + +The Pydra package is a part of the second generation of the Nipype_ ecosystem +--- an open-source framework that provides a uniform interface to existing neuroimaging +software and facilitates interaction between different software components. +The Nipype project was born in the neuroimaging community, and has been helping scientists +build workflows for a decade, providing a uniform interface to such neuroimaging packages +as FSL_, ANTs_, AFNI_, FreeSurfer_ and SPM_. +This flexibility has made it an ideal basis for popular preprocessing tools, +such as fMRIPrep_ and C-PAC_. +The second generation of Nipype ecosystem is meant to provide additional flexibility +and is being developed with reproducibility, ease of use, and scalability in mind. +Pydra itself is a standalone project and is designed as a general-purpose dataflow engine +to support any scientific domain. + +The goal of Pydra is to provide a lightweight dataflow engine for computational graph construction, +manipulation, and distributed execution, as well as ensuring reproducibility of scientific pipelines. +In Pydra, a dataflow is represented as a directed acyclic graph, where each node represents a Python +function, execution of an external tool, or another reusable dataflow. +The combination of several key features makes Pydra a customizable and powerful dataflow engine: + +- Composable dataflows: Any node of a dataflow graph can be another dataflow, allowing for nested + dataflows of arbitrary depths and encouraging creating reusable dataflows. + +- Flexible semantics for creating nested loops over input sets: Any Task or dataflow can be run + over input parameter sets and the outputs can be recombined (similar concept to Map-Reduce_ model, + but Pydra extends this to graphs with nested dataflows). + +- A content-addressable global cache: Hash values are computed for each graph and each Task. + This supports reusing of previously computed and stored dataflows and Tasks. + +- Support for Python functions and external (shell) commands: Pydra can decorate and use existing + functions in Python libraries alongside external command line tools, allowing easy integration + of existing code and software. + +- Native container execution support: Any dataflow or Task can be executed in an associated container + (via Docker or Singularity) enabling greater consistency for reproducibility. + +- Auditing and provenance tracking: Pydra provides a simple JSON-LD-based message passing mechanism + to capture the dataflow execution activities as a provenance graph. These messages track inputs + and outputs of each task in a dataflow, and the resources consumed by the task. + +.. _Nipype: https://nipype.readthedocs.io/en/latest/ +.. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL +.. _ANTs: http://stnava.github.io/ANTs/ +.. _AFNI: https://afni.nimh.nih.gov/ +.. _FreeSurfer: https://surfer.nmr.mgh.harvard.edu/ +.. _SPM: https://www.fil.ion.ucl.ac.uk/spm/ +.. _fMRIPrep: https://fmriprep.org/en/stable/ +.. _C-PAC: https://fcp-indi.github.io/docs/latest/index +.. _Map-Reduce: https://en.wikipedia.org/wiki/MapReduce + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + + +.. toctree:: + :maxdepth: 2 + :caption: Tutorials + :hidden: + + tutorial/execution + tutorial/task + tutorial/shell + tutorial/workflow + +.. toctree:: + :maxdepth: 2 + :caption: How-to Guides + :hidden: + + howto/install + howto/create-task-package + howto/port-from-nipype + +.. toctree:: + :maxdepth: 2 + :caption: Explanation + :hidden: + + explanation/hashing-caching + explanation/provenance + explanation/lazy-evaluation + +.. toctree:: + :maxdepth: 2 + :caption: Reference + :hidden: + + reference/api diff --git a/docs/source/reference/api.rst b/docs/source/reference/api.rst new file mode 100644 index 0000000000..fed18c54ec --- /dev/null +++ b/docs/source/reference/api.rst @@ -0,0 +1,27 @@ +Library API (application programmer interface) +============================================== + +.. automodule:: pydra.design.python + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: pydra.design.shell + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: pydra.design.workflow + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: pydra.engine.specs + :members: + :undoc-members: + :show-inheritance: + +.. automodule:: pydra.engine.task + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/tutorial/execution.ipynb b/docs/source/tutorial/execution.ipynb new file mode 100644 index 0000000000..16d1865558 --- /dev/null +++ b/docs/source/tutorial/execution.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Executing tasks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb new file mode 100644 index 0000000000..ba74a9c786 --- /dev/null +++ b/docs/source/tutorial/shell.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Designing shell tasks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/tutorial/task.ipynb b/docs/source/tutorial/task.ipynb new file mode 100644 index 0000000000..b5d37bfedb --- /dev/null +++ b/docs/source/tutorial/task.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Designing tasks" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "funcOutputs(out=2.0)\n" + ] + } + ], + "source": [ + "from pydra.design import python\n", + "\n", + "\n", + "def func(a: int) -> float:\n", + " \"\"\"Sample function with inputs and outputs\"\"\"\n", + " return a * 2\n", + "\n", + "SampleSpec = python.define(func)\n", + "\n", + "spec = SampleSpec(a=1)\n", + "result = spec()\n", + "print(result.output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With typing" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def func(a: int, k: float = 2.0) -> float:\n", + " \"\"\"Sample function with inputs and outputs\"\"\"\n", + " return a * k\n", + "\n", + "SampleSpec = python.define(func)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Augment with explicit inputs and outputs\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from decimal import Decimal\n", + "\n", + "def func(a: int) -> float:\n", + " \"\"\"Sample function with inputs and outputs\"\"\"\n", + " return a * 2\n", + "\n", + "SampleSpec = python.define(\n", + " func,\n", + " inputs={\"a\": python.arg(help_string=\"The argument to be doubled\")},\n", + " outputs={\"b\": python.out(help_string=\"the doubled output\", type=Decimal)},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Decorated_function" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Note we use CamelCase as the function is translated to a class\n", + "\n", + "@python.define(outputs=[\"c\", \"d\"])\n", + "def SampleSpec(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a * b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Pull helps from docstring" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)]\n", + "[out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)]\n" + ] + } + ], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import list_fields\n", + "\n", + "@python.define(outputs=[\"c\", \"d\"])\n", + "def SampleSpec(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"Sample function for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + "\n", + " Returns:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + " return a + b, a * b\n", + "\n", + "pprint(list_fields(SampleSpec))\n", + "pprint(list_fields(SampleSpec.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataclass form" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[arg(name='b', type=, default=2.0, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)]\n", + "[out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)]\n" + ] + } + ], + "source": [ + "\n", + "@python.define\n", + "class SampleSpec:\n", + " \"\"\"Sample class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0\n", + "\n", + " class Outputs:\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a * b\n", + "\n", + "pprint(list_fields(SampleSpec))\n", + "pprint(list_fields(SampleSpec.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Canonical form (to work with static type-checking)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)]\n", + "[out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)]\n" + ] + } + ], + "source": [ + "from pydra.engine.specs import PythonSpec\n", + "\n", + "@python.define\n", + "class SampleSpec(PythonSpec[\"SampleSpec.Outputs\"]):\n", + " \"\"\"Sample class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float\n", + "\n", + " class Outputs:\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a * b\n", + "\n", + "pprint(list_fields(SampleSpec))\n", + "pprint(list_fields(SampleSpec.Outputs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/tutorial/workflow.ipynb b/docs/source/tutorial/workflow.ipynb new file mode 100644 index 0000000000..552dce7c1c --- /dev/null +++ b/docs/source/tutorial/workflow.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Designing Workflows" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index 24ada3c366..46cf85c08f 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -19,8 +19,8 @@ def check_latest_version(): # Run telemetry on import for interactive sessions, such as IPython, Jupyter notebooks, Python REPL -if not hasattr(__main__, "__file__"): - from .engine.core import TaskBase +# if not hasattr(__main__, "__file__"): +# from pydra.engine.core import TaskBase - if TaskBase._etelemetry_version_data is None: - TaskBase._etelemetry_version_data = check_latest_version() +# if TaskBase._etelemetry_version_data is None: +# TaskBase._etelemetry_version_data = check_latest_version() diff --git a/pyproject.toml b/pyproject.toml index cfe924a9f5..49bb7eaf1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,9 +47,11 @@ dev = ["black", "pre-commit", "pydra[test]"] doc = [ "packaging", "sphinx ==6.2.1", - "sphinx_rtd_theme", + "nbsphinx", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", + "furo>=2022.2.14.1", + "numpydoc>=0.6.0", ] test = [ "pytest >=6.2.5", From 0077f68dfb22d7b724785e01c8da8e0e8e193056 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 21:28:15 +1100 Subject: [PATCH 073/342] added simple installation instructions --- docs/source/howto/install.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/source/howto/install.rst b/docs/source/howto/install.rst index 80c058b0bd..9bf6f470ec 100644 --- a/docs/source/howto/install.rst +++ b/docs/source/howto/install.rst @@ -1,4 +1,8 @@ Installation ============ -Work in progress.... +Pydra can be installed using pip: + +.. code-block:: bash + + $ pip install pydra From c79905cf3f1e2c0eba8bf192e63b557d1976d53b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 22:10:14 +1100 Subject: [PATCH 074/342] touched up new docs --- docs/source/explanation/hashing-caching.rst | 172 -------------------- docs/source/explanation/typing.rst | 4 + docs/source/index.rst | 1 + docs/source/reference/api.rst | 20 ++- docs/source/tutorial/execution.ipynb | 24 ++- 5 files changed, 45 insertions(+), 176 deletions(-) create mode 100644 docs/source/explanation/typing.rst diff --git a/docs/source/explanation/hashing-caching.rst b/docs/source/explanation/hashing-caching.rst index 4945a158f9..19a928a040 100644 --- a/docs/source/explanation/hashing-caching.rst +++ b/docs/source/explanation/hashing-caching.rst @@ -2,175 +2,3 @@ Hashing and Caching =================== Work in progress.... - - - - - - - -## Overrides - - - - -## wrap_function_types - -def func(a: int) -> int: - """Sample function with inputs and outputs""" - return a * 2 - -SampleSpec = python.define( - func, - inputs={"a": float}, - outputs={"b": float}, -) - - - - - - - -## implicit_outputs_from_return_stmt - -@python.define -def SampleSpec(a: int, b: float) -> tuple[float, float]: - """Sample function for testing""" - c = a + b - d = a * b - return c, d - - -## Function docstr - - -@python.define(outputs=["c", "d"]) -def SampleSpec(a: int, b: float) -> tuple[float, float]: - """Sample function for testing - - :param a: First input to be inputted - :param b: Second input - :return c: Sum of a and b - :return d: product of a and b - """ - return a + b, a * b - -@python.define(outputs=["c", "d"]) -def SampleSpec(a: int, b: float) -> tuple[float, float]: - """Sample function for testing - - Args: - a: First input - to be inputted - b: Second input - - Returns: - c: Sum of a and b - d: Product of a and b - """ - return a + b, a * b - -@python.define( - outputs=["c", "d"] -) # Could potentiall read output names from doc-string instead -def SampleSpec(a: int, b: float) -> tuple[float, float]: - """Sample function for testing - - Parameters - ---------- - a: int - First input - to be inputted - b: float - Second input - - Returns - ------- - c : int - Sum of a and b - d : float - Product of a and b - """ - return a + b, a * b - - -## Canonical (dataclass-style) form - -@python.define -class SampleSpec: - """Sample class for testing - - Args: - a: First input - to be inputted - b: Second input - """ - - a: int - b: float = 2.0 - - class Outputs: - """ - Args: - c: Sum of a and b - d: Product of a and b - """ - - c: float - d: float - - @staticmethod - def function(a, b): - return a + b, a * b - - - -## With inheritance - -@python.define -class SampleSpec(PythonSpec["SampleSpec.Outputs"]): - """Sample class for testing - - Args: - a: First input - to be inputted - b: Second input - """ - - a: int - b: float - - class Outputs: - """ - Args: - c: Sum of a and b - d: Product of a and b - """ - - c: float - d: float - - @staticmethod - def function(a, b): - return a + b, a * b - - -## no_auto_attribs - -@python.define(auto_attribs=False) -class SampleSpec: - a: int = python.arg(help_string="First input to be inputted") - b: float = python.arg(help_string="Second input") - - x: int - - class Outputs: - c: float = python.out(help_string="Sum of a and b") - d: float = python.out(help_string="Product of a and b") - - y: str - - @staticmethod - def function(a, b): - return a + b, a * b diff --git a/docs/source/explanation/typing.rst b/docs/source/explanation/typing.rst new file mode 100644 index 0000000000..170cc1f75f --- /dev/null +++ b/docs/source/explanation/typing.rst @@ -0,0 +1,4 @@ +Strong typing +============= + +Work in progress... diff --git a/docs/source/index.rst b/docs/source/index.rst index 7fa5264fb6..44d9913a9b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -104,6 +104,7 @@ Indices and tables :caption: Explanation :hidden: + explanation/typing explanation/hashing-caching explanation/provenance explanation/lazy-evaluation diff --git a/docs/source/reference/api.rst b/docs/source/reference/api.rst index fed18c54ec..8e374efb51 100644 --- a/docs/source/reference/api.rst +++ b/docs/source/reference/api.rst @@ -1,26 +1,42 @@ -Library API (application programmer interface) -============================================== +API +=== + +Python tasks +------------ .. automodule:: pydra.design.python :members: :undoc-members: :show-inheritance: +Shell tasks +----------- + .. automodule:: pydra.design.shell :members: :undoc-members: :show-inheritance: +Workflows +--------- + .. automodule:: pydra.design.workflow :members: :undoc-members: :show-inheritance: +Specification classes +--------------------- + .. automodule:: pydra.engine.specs :members: :undoc-members: :show-inheritance: + +Task classes +------------ + .. automodule:: pydra.engine.task :members: :undoc-members: diff --git a/docs/source/tutorial/execution.ipynb b/docs/source/tutorial/execution.ipynb index 16d1865558..487459c1b2 100644 --- a/docs/source/tutorial/execution.ipynb +++ b/docs/source/tutorial/execution.ipynb @@ -4,7 +4,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Executing tasks" + "# Running tasks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#from pydra.tasks.common import LoadJson\n", + "\n", + "#load_json = LoadJson(file=\"/path/to/my/file.json\")\n", + "#result = load_json(plugin=\"serial\")\n", + "\n", + "#print(result.output)" ] }, { @@ -14,8 +28,14 @@ } ], "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "name": "python", + "version": "3.12.5" } }, "nbformat": 4, From 8c08f9e51dc9a54faf9634ff1971780a70f67d25 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 16 Dec 2024 22:25:05 +1100 Subject: [PATCH 075/342] more docs restructuring --- docs/source/conf.py | 8 ++++---- docs/source/explanation/conditional-logic.rst | 4 ++++ docs/source/explanation/hashing-caching.rst | 2 +- docs/source/explanation/lazy-evaluation.rst | 4 ++-- docs/source/howto/create-task-package.rst | 2 +- docs/source/index.rst | 11 ++++++++++- docs/source/tutorial/shell.ipynb | 2 +- docs/source/tutorial/workflow.ipynb | 2 +- 8 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 docs/source/explanation/conditional-logic.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 2abd3c8c2f..a82719036d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -135,12 +135,12 @@ # documentation. html_theme_options = { "light_css_variables": { - "color-brand-primary": "#0e691b", - "color-brand-content": "#0e691b", + "color-brand-primary": "#69306d", + "color-brand-content": "#69306d", }, "dark_css_variables": { - "color-brand-primary": "#5db754", - "color-brand-content": "#5db754", + "color-brand-primary": "#ce8dcf", + "color-brand-content": "#ce8dcf", }, } diff --git a/docs/source/explanation/conditional-logic.rst b/docs/source/explanation/conditional-logic.rst new file mode 100644 index 0000000000..3ed2c600ed --- /dev/null +++ b/docs/source/explanation/conditional-logic.rst @@ -0,0 +1,4 @@ +Conditional construction +======================== + +Work in progress... diff --git a/docs/source/explanation/hashing-caching.rst b/docs/source/explanation/hashing-caching.rst index 19a928a040..44e4189b73 100644 --- a/docs/source/explanation/hashing-caching.rst +++ b/docs/source/explanation/hashing-caching.rst @@ -1,4 +1,4 @@ -Hashing and Caching +Hashing and caching =================== Work in progress.... diff --git a/docs/source/explanation/lazy-evaluation.rst b/docs/source/explanation/lazy-evaluation.rst index 0d2b45fd72..4ad81b1111 100644 --- a/docs/source/explanation/lazy-evaluation.rst +++ b/docs/source/explanation/lazy-evaluation.rst @@ -1,4 +1,4 @@ -Lazy Execution -============== +Lazy evaluation +=============== Work in progress.... diff --git a/docs/source/howto/create-task-package.rst b/docs/source/howto/create-task-package.rst index 25cf3ccbbe..3170372bb0 100644 --- a/docs/source/howto/create-task-package.rst +++ b/docs/source/howto/create-task-package.rst @@ -1,4 +1,4 @@ -Creating a Task Package +Creating a task package ======================= Work in progress.... diff --git a/docs/source/index.rst b/docs/source/index.rst index 44d9913a9b..f480c87808 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -7,6 +7,9 @@ Pydra is a new lightweight dataflow engine written in Python. Pydra is developed as an open-source project in the neuroimaging community, but it is designed as a general-purpose dataflow engine to support any scientific domain. +Rationale +--------- + Scientific workflows often require sophisticated analyses that encompass a large collection of algorithms. The algorithms, that were originally not necessarily designed to work together, @@ -20,6 +23,9 @@ Consistency, reproducibility and scalability demand scientific workflows to be organized into fully automated pipelines. This was the motivation behind Pydra - a new dataflow engine written in Python. +History +------- + The Pydra package is a part of the second generation of the Nipype_ ecosystem --- an open-source framework that provides a uniform interface to existing neuroimaging software and facilitates interaction between different software components. @@ -33,6 +39,9 @@ and is being developed with reproducibility, ease of use, and scalability in min Pydra itself is a standalone project and is designed as a general-purpose dataflow engine to support any scientific domain. +Design goals +------------ + The goal of Pydra is to provide a lightweight dataflow engine for computational graph construction, manipulation, and distributed execution, as well as ensuring reproducibility of scientific pipelines. In Pydra, a dataflow is represented as a directed acyclic graph, where each node represents a Python @@ -77,7 +86,6 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` -* :ref:`search` .. toctree:: @@ -104,6 +112,7 @@ Indices and tables :caption: Explanation :hidden: + explanation/conditional-logic explanation/typing explanation/hashing-caching explanation/provenance diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb index ba74a9c786..41451903a7 100644 --- a/docs/source/tutorial/shell.ipynb +++ b/docs/source/tutorial/shell.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Designing shell tasks" + "# Shell tasks" ] }, { diff --git a/docs/source/tutorial/workflow.ipynb b/docs/source/tutorial/workflow.ipynb index 552dce7c1c..e98701a7d6 100644 --- a/docs/source/tutorial/workflow.ipynb +++ b/docs/source/tutorial/workflow.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Designing Workflows" + "# Workflows" ] }, { From 718e42af8ee8082ecf22d37d589741ffb65657e4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 18 Dec 2024 18:46:13 +1100 Subject: [PATCH 076/342] reworked new docs structure --- docs/source/explanation/conditional-lazy.rst | 4 + docs/source/explanation/conditional-logic.rst | 4 - docs/source/explanation/lazy-evaluation.rst | 4 - .../splitting-combining.rst} | 2 +- docs/source/howto/create-task-package.ipynb | 23 +++++ docs/source/howto/install.ipynb | 93 +++++++++++++++++++ docs/source/howto/install.rst | 8 -- docs/source/howto/port-from-nipype.ipynb | 23 +++++ docs/source/howto/port-from-nipype.rst | 4 - docs/source/index.rst | 5 +- pydra/engine/workflow/base.py | 5 + 11 files changed, 152 insertions(+), 23 deletions(-) create mode 100644 docs/source/explanation/conditional-lazy.rst delete mode 100644 docs/source/explanation/conditional-logic.rst delete mode 100644 docs/source/explanation/lazy-evaluation.rst rename docs/source/{howto/create-task-package.rst => explanation/splitting-combining.rst} (65%) create mode 100644 docs/source/howto/create-task-package.ipynb create mode 100644 docs/source/howto/install.ipynb delete mode 100644 docs/source/howto/install.rst create mode 100644 docs/source/howto/port-from-nipype.ipynb delete mode 100644 docs/source/howto/port-from-nipype.rst diff --git a/docs/source/explanation/conditional-lazy.rst b/docs/source/explanation/conditional-lazy.rst new file mode 100644 index 0000000000..2ddb9be34c --- /dev/null +++ b/docs/source/explanation/conditional-lazy.rst @@ -0,0 +1,4 @@ +Conditional construction and lazy evaluation +===================================================== + +Work in progress... diff --git a/docs/source/explanation/conditional-logic.rst b/docs/source/explanation/conditional-logic.rst deleted file mode 100644 index 3ed2c600ed..0000000000 --- a/docs/source/explanation/conditional-logic.rst +++ /dev/null @@ -1,4 +0,0 @@ -Conditional construction -======================== - -Work in progress... diff --git a/docs/source/explanation/lazy-evaluation.rst b/docs/source/explanation/lazy-evaluation.rst deleted file mode 100644 index 4ad81b1111..0000000000 --- a/docs/source/explanation/lazy-evaluation.rst +++ /dev/null @@ -1,4 +0,0 @@ -Lazy evaluation -=============== - -Work in progress.... diff --git a/docs/source/howto/create-task-package.rst b/docs/source/explanation/splitting-combining.rst similarity index 65% rename from docs/source/howto/create-task-package.rst rename to docs/source/explanation/splitting-combining.rst index 3170372bb0..08991c6ea7 100644 --- a/docs/source/howto/create-task-package.rst +++ b/docs/source/explanation/splitting-combining.rst @@ -1,4 +1,4 @@ -Creating a task package +Splitting and combining ======================= Work in progress.... diff --git a/docs/source/howto/create-task-package.ipynb b/docs/source/howto/create-task-package.ipynb new file mode 100644 index 0000000000..6479dbc78f --- /dev/null +++ b/docs/source/howto/create-task-package.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating a task package" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/howto/install.ipynb b/docs/source/howto/install.ipynb new file mode 100644 index 0000000000..6a0887c1c2 --- /dev/null +++ b/docs/source/howto/install.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Installation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pydra is a pure Python package, which only depends on two packages `attrs` and `fileformats`. Therefore, it is straightforward to install via pip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "pip install pydra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To install a development version use the `dev` install option" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "zsh:1: no matches found: pydra[dev]\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "git clone git@github.com:nipype/pydra.git\n", + "pip install -e ./pydra[dev]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/howto/install.rst b/docs/source/howto/install.rst deleted file mode 100644 index 9bf6f470ec..0000000000 --- a/docs/source/howto/install.rst +++ /dev/null @@ -1,8 +0,0 @@ -Installation -============ - -Pydra can be installed using pip: - -.. code-block:: bash - - $ pip install pydra diff --git a/docs/source/howto/port-from-nipype.ipynb b/docs/source/howto/port-from-nipype.ipynb new file mode 100644 index 0000000000..3f673cd56e --- /dev/null +++ b/docs/source/howto/port-from-nipype.ipynb @@ -0,0 +1,23 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Port interfaces from Nipype" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/source/howto/port-from-nipype.rst b/docs/source/howto/port-from-nipype.rst deleted file mode 100644 index 31a1e3ed0e..0000000000 --- a/docs/source/howto/port-from-nipype.rst +++ /dev/null @@ -1,4 +0,0 @@ -Porting tasks from Nipype -========================= - -Work in progress.... diff --git a/docs/source/index.rst b/docs/source/index.rst index f480c87808..6d84dfe15e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -112,11 +112,12 @@ Indices and tables :caption: Explanation :hidden: - explanation/conditional-logic + explanation/splitting-combining explanation/typing explanation/hashing-caching + explanation/conditional-lazy explanation/provenance - explanation/lazy-evaluation + .. toctree:: :maxdepth: 2 diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index b3aa001714..56c3447e6c 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -124,6 +124,11 @@ def construct( return wf + @classmethod + def clear_cache(cls): + """Clear the cache of constructed workflows""" + cls._constructed.clear() + def add(self, task_spec: TaskSpec[OutputsType], name=None) -> OutputsType: """Add a node to the workflow From 8e1d83ce724d94de714628b12129316634c97ff5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 19 Dec 2024 11:20:24 +1100 Subject: [PATCH 077/342] Update pydra/engine/tests/test_boutiques.py Co-authored-by: Chris Markiewicz --- pydra/engine/tests/test_boutiques.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index d56f6d5809..ba2b1af117 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -95,6 +95,7 @@ def test_boutiques_spec_2(data_tests_dir): def test_boutiques_wf_1(maskfile, plugin, tmpdir, infile): """wf with one task that runs fsl.bet using BoshTask""" + @workflow.define def Workflow(maskfile, infile): bet = workflow.add( boutiques.define(zenodo_id="1482743")( From c3f824943f4e7e08551de811b2f282715187d0f3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 19 Dec 2024 15:54:43 +1100 Subject: [PATCH 078/342] touching up docs structure --- docs/source/explanation/conditional-lazy.rst | 4 +- .../explanation/splitting-combining.rst | 87 +++- docs/source/explanation/typing.rst | 2 +- docs/source/tutorial/execution.ipynb | 51 +- docs/source/tutorial/shell.ipynb | 2 +- docs/source/tutorial/task.ipynb | 2 +- docs/source/tutorial/workflow.ipynb | 2 +- .../{mark => engine}/tests/test_functions.py | 91 ++-- pydra/engine/tests/test_node_task.py | 4 +- pydra/engine/tests/test_numpy_examples.py | 5 +- pydra/mark/__init__.py | 4 - pydra/mark/functions.py | 49 -- pydra/mark/shell.py | 409 --------------- pydra/mark/tests/__init__.py | 0 pydra/mark/tests/test_shell.py | 467 ------------------ pydra/tasks/__init__.py | 11 - pydra/tasks/common/__init__.py | 7 + 17 files changed, 196 insertions(+), 1001 deletions(-) rename pydra/{mark => engine}/tests/test_functions.py (72%) delete mode 100644 pydra/mark/__init__.py delete mode 100644 pydra/mark/functions.py delete mode 100644 pydra/mark/shell.py delete mode 100644 pydra/mark/tests/__init__.py delete mode 100644 pydra/mark/tests/test_shell.py delete mode 100644 pydra/tasks/__init__.py create mode 100644 pydra/tasks/common/__init__.py diff --git a/docs/source/explanation/conditional-lazy.rst b/docs/source/explanation/conditional-lazy.rst index 2ddb9be34c..0c30be1d1c 100644 --- a/docs/source/explanation/conditional-lazy.rst +++ b/docs/source/explanation/conditional-lazy.rst @@ -1,4 +1,4 @@ -Conditional construction and lazy evaluation -===================================================== +Conditionals and lazy fields +============================ Work in progress... diff --git a/docs/source/explanation/splitting-combining.rst b/docs/source/explanation/splitting-combining.rst index 08991c6ea7..906a51443c 100644 --- a/docs/source/explanation/splitting-combining.rst +++ b/docs/source/explanation/splitting-combining.rst @@ -1,4 +1,89 @@ Splitting and combining ======================= -Work in progress.... +One of the main goals of creating Pydra was to support flexible evaluation of a Task or a Workflow +over combinations of input parameters. +This is the key feature that distinguishes it from most other dataflow engines. +This is similar to the concept of the Map-Reduce_, but extends it to work over arbitrary nested graphs. +In complex dataflows, this would typically involve significant overhead for data management +and use of multiple nested loops. +In Pydra, this is controlled by setting specific State related attributes through Task methods. +In order to set input splitting (or mapping), Pydra requires setting up a splitter. +This is done using Task's split method. +The simplest example would be a Task that has one field x in the input, and therefore there +is only one way of splitting its input. +Assuming that the user provides a list as a value of x, Pydra splits the list, so each copy +of the Task will get one element of the list. +This can be represented as follow: + +.. math:: + + S = x: x=[x_1, x_2, ..., x_n] \longmapsto x=x_1, x=x_2, ..., x=x_n~, + +where S represents the splitter, and x is the input field. +This is also represented in the diagram, where :math:`x=[1, 2, 3]` as an example, and the coloured +nodes represent stateless copies of the original Task after splitting the input, +(these are the runnables that are executed). + +.. figure:: ../_static/images/nd_spl_1.png + :figclass: h! + :scale: 50 % + +Types of Splitter +----------------- +Whenever a *Task* has more complicated inputs, +i.e. multiple fields, there are two ways of creating the mapping, +each one is used for different application. +These *splitters* are called *scalar splitter* and *outer splitter*. +They use a special, but Python-based syntax as described next. + +Scalar Splitter +--------------- +A *scalar splitter* performs element-wise mapping and requires that the lists of +values for two or more fields to have the same length. The *scalar splitter* uses +Python tuples and its operation is therefore represented by a parenthesis, ``()``: + +.. math:: + + S = (x, y) : x=[x_1, x_2, .., x_n],~y=[y_1, y_2, .., y_n] \mapsto (x, y)=(x_1, y_1),..., (x, y)=(x_n, y_n), + + +where `S` represents the *splitter*, `x` and `y` are the input fields. +This is also represented as a diagram: + +.. figure:: ../_static/images/nd_spl_4.png + :figclass: h! + :scale: 80% + + +Outer Splitter +-------------- + +The second option of mapping the input, when there are multiple fields, is +provided by the *outer splitter*. The *outer splitter* creates all combination +of the input values and does not require the lists to have the same lengths. +The *outer splitter* uses Python's list syntax and is represented by square +brackets, ``[]``: + +.. math:: + + S = [x, y] &:& x=[x_1, x_2, ..., x_n],~~ y=[y_1, y_2, ..., y_m], \\ + &\mapsto& (x, y)=(x_1, y_1), (x, y)=(x_1, y_2)..., (x, y)=(x_n, y_m). + + +The *outer splitter* for a node with two input fields is schematically represented in the diagram: + +.. figure:: ../_static/images/nd_spl_3.png + :figclass: h! + :scale: 80% + + +Different types of splitters can be combined over inputs such as +`[inp1, (inp2, inp3)]`. In this example an *outer splitter* provides all +combinations of values of `inp1` with pairwise combinations of values of `inp2` +and `inp3`. This can be extended to arbitrary complexity. +In additional, the output can be merge at the end if needed. +This will be explained in the next section. + + +.. _Map-Reduce: https://en.wikipedia.org/wiki/MapReduce diff --git a/docs/source/explanation/typing.rst b/docs/source/explanation/typing.rst index 170cc1f75f..c543c966cd 100644 --- a/docs/source/explanation/typing.rst +++ b/docs/source/explanation/typing.rst @@ -1,4 +1,4 @@ -Strong typing +Type checking ============= Work in progress... diff --git a/docs/source/tutorial/execution.ipynb b/docs/source/tutorial/execution.ipynb index 487459c1b2..64aebe9249 100644 --- a/docs/source/tutorial/execution.ipynb +++ b/docs/source/tutorial/execution.ipynb @@ -4,21 +4,50 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Running tasks" + "# Executing tasks\n", + "\n", + "A *Task* is the basic runnable component in Pydra, and can execute either a Python function,\n", + "shell command or workflows consisting of combinations of all three types." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pydra.mark'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapplication\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Json\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcommon\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LoadJson\n\u001b[0;32m----> 4\u001b[0m load_json \u001b[38;5;241m=\u001b[39m LoadJson(file\u001b[38;5;241m=\u001b[39m\u001b[43mJson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/path/to/my/file.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 5\u001b[0m result \u001b[38;5;241m=\u001b[39m load_json(plugin\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserial\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(result\u001b[38;5;241m.\u001b[39moutput)\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1050\u001b[0m, in \u001b[0;36mFileSet.sample\u001b[0;34m(cls, dest_dir, seed, stem)\u001b[0m\n\u001b[1;32m 1047\u001b[0m dest_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n\u001b[1;32m 1048\u001b[0m \u001b[38;5;66;03m# Need to use mock to get an instance in order to use the singledispatch-based\u001b[39;00m\n\u001b[1;32m 1049\u001b[0m \u001b[38;5;66;03m# extra decorator\u001b[39;00m\n\u001b[0;32m-> 1050\u001b[0m fspaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43mSampleFileGenerator\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfname_stem\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1054\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m(fspaths)\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1083\u001b[0m, in \u001b[0;36mFileSet.sample_data\u001b[0;34m(cls, generator)\u001b[0m\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Converts the `generate_sample_data` method into a class method by mocking up\u001b[39;00m\n\u001b[1;32m 1070\u001b[0m \u001b[38;5;124;03ma class instance and calling the method on it\u001b[39;00m\n\u001b[1;32m 1071\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;124;03m the generated file-system paths\u001b[39;00m\n\u001b[1;32m 1081\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1082\u001b[0m mock: FileSet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmock()\n\u001b[0;32m-> 1083\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_sample_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/extras.py:38\u001b[0m, in \u001b[0;36mextra..decorated\u001b[0;34m(obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m extras \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tp \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mreferenced_types(): \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m extras\u001b[38;5;241m.\u001b[39mappend(\u001b[43mimport_extras_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtp\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dispatch_method(obj, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/utils.py:230\u001b[0m, in \u001b[0;36mimport_extras_module\u001b[0;34m(klass)\u001b[0m\n\u001b[1;32m 228\u001b[0m extras_pypi \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfileformats-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msub_pkg\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-extras\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 230\u001b[0m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextras_pkg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo module named \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mextras_pkg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 89\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m:1387\u001b[0m, in \u001b[0;36m_gcd_import\u001b[0;34m(name, package, level)\u001b[0m\n", + "File \u001b[0;32m:1360\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", + "File \u001b[0;32m:1331\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", + "File \u001b[0;32m:935\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", + "File \u001b[0;32m:995\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", + "File \u001b[0;32m:488\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n", + "File \u001b[0;32m~/git/workflows/fileformats/extras/fileformats/extras/application/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m archive\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m serialization\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m medical\n", + "File \u001b[0;32m~/git/workflows/fileformats/extras/fileformats/extras/application/archive.py:9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mattrs\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmark\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecs\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FsObject\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pydra.mark'" + ] + } + ], "source": [ - "#from pydra.tasks.common import LoadJson\n", + "from fileformats.application import Json\n", + "from pydra.tasks.common import LoadJson\n", "\n", - "#load_json = LoadJson(file=\"/path/to/my/file.json\")\n", - "#result = load_json(plugin=\"serial\")\n", + "load_json = LoadJson(file=Json.sample(\"/path/to/my/file.json\"))\n", + "result = load_json(plugin=\"serial\")\n", "\n", - "#print(result.output)" + "print(result.output)" ] }, { @@ -34,7 +63,15 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", "version": "3.12.5" } }, diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb index 41451903a7..1f280a152f 100644 --- a/docs/source/tutorial/shell.ipynb +++ b/docs/source/tutorial/shell.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Shell tasks" + "# Shell task design" ] }, { diff --git a/docs/source/tutorial/task.ipynb b/docs/source/tutorial/task.ipynb index b5d37bfedb..f771ee618f 100644 --- a/docs/source/tutorial/task.ipynb +++ b/docs/source/tutorial/task.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Designing tasks" + "# Task design" ] }, { diff --git a/docs/source/tutorial/workflow.ipynb b/docs/source/tutorial/workflow.ipynb index e98701a7d6..5538297961 100644 --- a/docs/source/tutorial/workflow.ipynb +++ b/docs/source/tutorial/workflow.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Workflows" + "# Workflow design" ] }, { diff --git a/pydra/mark/tests/test_functions.py b/pydra/engine/tests/test_functions.py similarity index 72% rename from pydra/mark/tests/test_functions.py rename to pydra/engine/tests/test_functions.py index e124c518dc..ee49c6d269 100644 --- a/pydra/mark/tests/test_functions.py +++ b/pydra/engine/tests/test_functions.py @@ -2,19 +2,31 @@ import random import typing as ty -from ..functions import task, annotate -from pydra.engine.task import PythonTask +from pydra.design import python +from pydra.engine.helpers import list_fields def test_task_equivalence(): def add_two(a): return a + 2 - canonical = PythonTask(add_two, a=3) + @python.define + class Canonical: - decorated1 = task(add_two)(a=3) + a: ty.Any - @task + class Outputs: + out: ty.Any + + @staticmethod + def function(a): + return a + 2 + + canonical = Canonical(a=3) + + decorated1 = python.define(add_two)(a=3) + + @python.define def addtwo(a): return a + 2 @@ -36,11 +48,11 @@ def test_annotation_equivalence_1(): def direct(a: int) -> int: return a + 2 - @annotate({"return": int}) + @python.define(outputs={"out": int}) def partial(a: int): return a + 2 - @annotate({"a": int, "return": int}) + @python.define(inputs={"a": int}, outputs={"return": int}) def indirect(a): return a + 2 @@ -54,8 +66,8 @@ def indirect(a): assert direct(a) == indirect(a) # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() - assert task_direct.output_spec.fields[0] == ("out", int) + task_direct = python.define(direct)() + assert list_fields(task_direct.Outputs)[0] == python.out(name="out", type=int) def test_annotation_equivalence_2(): @@ -64,12 +76,12 @@ def test_annotation_equivalence_2(): def direct(a: int) -> (int, float): return a + 2, a + 2.0 - @annotate({"return": (int, float)}) + @python.define(outputs={"out": (int, float)}) def partial(a: int): return a + 2, a + 2.0 - @annotate({"a": int, "return": (int, float)}) - def indirect(a): + @python.define(inputs={"a": int}) + def indirect(a) -> tuple[int, float]: return a + 2, a + 2.0 # checking if the annotations are equivalent @@ -82,21 +94,22 @@ def indirect(a): assert direct(a) == indirect(a) # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() + task_direct = python.define(direct)() assert task_direct.output_spec.fields == [("out1", int), ("out2", float)] def test_annotation_equivalence_3(): """testing various ways of annotation: using dictionary for output annot.""" - def direct(a: int) -> {"out1": int}: + @python.define(outputs=["out1"]) + def direct(a: int) -> int: return a + 2 - @annotate({"return": {"out1": int}}) + @python.define(inputs={"return": {"out1": int}}) def partial(a: int): return a + 2 - @annotate({"a": int, "return": {"out1": int}}) + @python.define(inputs={"a": int}, outputs={"out1": int}) def indirect(a): return a + 2 @@ -110,23 +123,22 @@ def indirect(a): assert direct(a) == indirect(a) # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() + task_direct = python.define(direct)() assert task_direct.output_spec.fields[0] == ("out1", int) def test_annotation_equivalence_4(): """testing various ways of annotation: using ty.NamedTuple for the output""" - def direct(a: int) -> ty.NamedTuple("Output", [("sum", int), ("sub", int)]): + @python.define(outputs=["sum", "sub"]) + def direct(a: int) -> tuple[int, int]: return a + 2, a - 2 - @annotate({"return": ty.NamedTuple("Output", [("sum", int), ("sub", int)])}) + @python.define(outputs={"sum": int, "sub": int}) def partial(a: int): return a + 2, a - 2 - @annotate( - {"a": int, "return": ty.NamedTuple("Output", [("sum", int), ("sub", int)])} - ) + @python.define(inputs={"a": int}, outputs={"sum": int, "sub": int}) def indirect(a): return a + 2, a - 2 @@ -148,28 +160,23 @@ def indirect(a): assert direct(a) == indirect(a) # checking if the annotation is properly converted to output_spec if used in task - task_direct = task(direct)() - assert task_direct.output_spec.fields == [("sum", int), ("sub", int)] - - -def test_annotation_override(): - @annotate({"a": float, "return": float}) - def annotated(a: int) -> int: - return a + 2 - - assert annotated.__annotations__ == {"a": float, "return": float} + task_direct = python.define(direct)() + assert list_fields(task_direct.Outputs) == [ + python.arg(name="sum", type=int), + python.arg(name="sub", type=int), + ] def test_invalid_annotation(): with pytest.raises(TypeError): - @annotate({"b": int}) + @python.define(inputs={"b": int}) def addtwo(a): return a + 2 def test_annotated_task(): - @task + def square(in_val: float): return in_val**2 @@ -178,8 +185,8 @@ def square(in_val: float): def test_return_annotated_task(): - @task - @annotate({"in_val": float, "return": {"squared": float}}) + + @python.define(inputs={"in_val": float}, outputs={"squared": float}) def square(in_val): return in_val**2 @@ -188,8 +195,8 @@ def square(in_val): def test_return_halfannotated_annotated_task(): - @task - @annotate({"in_val": float, "return": float}) + + @python.define(inputs={"in_val": float}, outputs={"out": float}) def square(in_val): return in_val**2 @@ -198,8 +205,8 @@ def square(in_val): def test_return_annotated_task_multiple_output(): - @task - @annotate({"in_val": float, "return": {"squared": float, "cubed": float}}) + + @python.define(inputs={"in_val": float}, outputs={"squared": float, "cubed": float}) def square(in_val): return in_val**2, in_val**3 @@ -209,8 +216,8 @@ def square(in_val): def test_return_halfannotated_task_multiple_output(): - @task - @annotate({"in_val": float, "return": (float, float)}) + + @python.define(inputs={"in_val": float}, outputs=(float, float)) def square(in_val): return in_val**2, in_val**3 diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 154afa5534..475d4a0ec7 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -9,7 +9,7 @@ import pytest import time from fileformats.generic import File -import pydra.mark +from pydra.design import python from .utils import ( fun_addtwo, @@ -1643,7 +1643,7 @@ def test_task_files_persistentcache(tmp_path): cache_dir.mkdir() test_file = OverriddenContentsFile(test_file_path) - @pydra.mark.task + @python.define def read_contents(x: OverriddenContentsFile) -> bytes: return x.contents diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index e770011b9c..b0c998666a 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -7,7 +7,7 @@ from ..submitter import Submitter -from pydra.mark import task, annotate +from pydra.design import python, workflow from .utils import identity from pydra.utils.hash import hash_function @@ -15,8 +15,7 @@ pytest.skip("can't find numpy library", allow_module_level=True) -@task -@annotate({"return": {"b": ty.Any}}) +@python.define(outputs=["b"]) def arrayout(val): return np.array([val, val]) diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py deleted file mode 100644 index f2434e5a1c..0000000000 --- a/pydra/mark/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .functions import annotate, task -from .shell import shell_task, shell_arg, shell_out - -__all__ = ("annotate", "task", "shell_task", "shell_arg", "shell_out") diff --git a/pydra/mark/functions.py b/pydra/mark/functions.py deleted file mode 100644 index c8b45fc265..0000000000 --- a/pydra/mark/functions.py +++ /dev/null @@ -1,49 +0,0 @@ -""" Decorators to apply to functions used in Pydra workflows """ - -from functools import wraps - - -def annotate(annotation): - """ - Update the annotation of a function. - - Example - ------- - >>> import pydra - >>> @pydra.mark.annotate({'a': int, 'return': float}) - ... def square(a): - ... return a ** 2.0 - - """ - import inspect - - def decorate(func): - sig = inspect.signature(func) - unknown = set(annotation) - set(sig.parameters) - {"return"} - if unknown: - raise TypeError(f"Cannot annotate unknown parameters: {tuple(unknown)}") - func.__annotations__.update(annotation) - return func - - return decorate - - -def task(func): - """ - Promote a function to a :class:`~pydra.engine.task.PythonTask`. - - Example - ------- - >>> import pydra - >>> @pydra.mark.task - ... def square(a: int) -> float: - ... return a ** 2.0 - - """ - from pydra.engine.task import PythonTask - - @wraps(func) - def decorate(**kwargs): - return PythonTask(func=func, **kwargs) - - return decorate diff --git a/pydra/mark/shell.py b/pydra/mark/shell.py deleted file mode 100644 index 869a24362d..0000000000 --- a/pydra/mark/shell.py +++ /dev/null @@ -1,409 +0,0 @@ -"""Decorators and helper functions to create ShellTasks used in Pydra workflows""" - -from __future__ import annotations -import typing as ty -import attrs - -# import os -import pydra.engine.specs - - -def shell_task( - klass_or_name: ty.Union[type, str], - executable: ty.Optional[str] = None, - input_fields: ty.Optional[dict[str, dict]] = None, - output_fields: ty.Optional[dict[str, dict]] = None, - bases: ty.Optional[list[type]] = None, - inputs_bases: ty.Optional[list[type]] = None, - outputs_bases: ty.Optional[list[type]] = None, -) -> type: - """ - Construct an analysis class and validate all the components fit together - - Parameters - ---------- - klass_or_name : type or str - Either the class decorated by the @shell_task decorator or the name for a - dynamically generated class - executable : str, optional - If dynamically constructing a class (instead of decorating an existing one) the - name of the executable to run is provided - input_fields : dict[str, dict], optional - If dynamically constructing a class (instead of decorating an existing one) the - input fields can be provided as a dictionary of dictionaries, where the keys - are the name of the fields and the dictionary contents are passed as keyword - args to cmd_arg, with the exception of "type", which is used as the type annotation - of the field. - output_fields : dict[str, dict], optional - If dynamically constructing a class (instead of decorating an existing one) the - output fields can be provided as a dictionary of dictionaries, where the keys - are the name of the fields and the dictionary contents are passed as keyword - args to cmd_out, with the exception of "type", which is used as the type annotation - of the field. - bases : list[type] - Base classes for dynamically constructed shell command classes - inputs_bases : list[type] - Base classes for the input spec of dynamically constructed shell command classes - outputs_bases : list[type] - Base classes for the input spec of dynamically constructed shell command classes - - Returns - ------- - type - the shell command task class - """ - - annotations = { - "executable": str, - "Inputs": type, - "Outputs": type, - } - dct = {"__annotations__": annotations} - - if isinstance(klass_or_name, str): - # Dynamically created classes using shell_task as a function - name = klass_or_name - - if executable is not None: - dct["executable"] = executable - if input_fields is None: - input_fields = {} - if output_fields is None: - output_fields = {} - bases = list(bases) if bases is not None else [] - inputs_bases = list(inputs_bases) if inputs_bases is not None else [] - outputs_bases = list(outputs_bases) if outputs_bases is not None else [] - - # Ensure base classes included somewhere in MRO - def ensure_base_included(base_class: type, bases_list: list[type]): - if not any(issubclass(b, base_class) for b in bases_list): - bases_list.append(base_class) - - # Get inputs and outputs bases from base class if not explicitly provided - for base in bases: - if not inputs_bases: - try: - inputs_bases = [base.Inputs] - except AttributeError: - pass - if not outputs_bases: - try: - outputs_bases = [base.Outputs] - except AttributeError: - pass - - # Ensure bases are lists and can be modified - ensure_base_included(pydra.engine.task.ShellTask, bases) - ensure_base_included(pydra.engine.specs.ShellSpec, inputs_bases) - ensure_base_included(pydra.engine.specs.ShellOutputs, outputs_bases) - - def convert_to_attrs(fields: dict[str, dict[str, ty.Any]], attrs_func): - annotations = {} - attrs_dict = {"__annotations__": annotations} - for name, dct in fields.items(): - kwargs = dict(dct) # copy to avoid modifying input to outer function - annotations[name] = kwargs.pop("type") - attrs_dict[name] = attrs_func(**kwargs) - return attrs_dict - - Inputs = attrs.define(kw_only=True, slots=False)( - type( - "Inputs", - tuple(inputs_bases), - convert_to_attrs(input_fields, shell_arg), - ) - ) - - Outputs = attrs.define(kw_only=True, slots=False)( - type( - "Outputs", - tuple(outputs_bases), - convert_to_attrs(output_fields, shell_out), - ) - ) - - else: - # Statically defined classes using shell_task as decorator - if ( - executable, - input_fields, - output_fields, - bases, - inputs_bases, - outputs_bases, - ) != (None, None, None, None, None, None): - raise RuntimeError( - "When used as a decorator on a class, `shell_task` should not be " - "provided any other arguments" - ) - klass = klass_or_name - name = klass.__name__ - - bases = [klass] - if not issubclass(klass, pydra.engine.task.ShellTask): - bases.append(pydra.engine.task.ShellTask) - - try: - executable = klass.executable - except AttributeError: - raise RuntimeError( - "Classes decorated by `shell_task` should contain an `executable` " - "attribute specifying the shell tool to run" - ) - try: - Inputs = klass.Inputs - except AttributeError: - raise RuntimeError( - "Classes decorated by `shell_task` should contain an `Inputs` class " - "attribute specifying the inputs to the shell tool" - ) - - try: - Outputs = klass.Outputs - except AttributeError: - Outputs = type("Outputs", (pydra.engine.specs.ShellOutputs,), {}) - - # Pass Inputs and Outputs in attrs.define if they are present in klass (i.e. - # not in a base class) - if "Inputs" in klass.__dict__: - Inputs = attrs.define(kw_only=True, slots=False)(Inputs) - if "Outputs" in klass.__dict__: - Outputs = attrs.define(kw_only=True, slots=False)(Outputs) - - if not issubclass(Inputs, pydra.engine.specs.ShellSpec): - Inputs = attrs.define(kw_only=True, slots=False)( - type("Inputs", (Inputs, pydra.engine.specs.ShellSpec), {}) - ) - - template_fields = _gen_output_template_fields(Inputs, Outputs) - - if not issubclass(Outputs, pydra.engine.specs.ShellOutputs): - outputs_bases = (Outputs, pydra.engine.specs.ShellOutputs) - add_base_class = True - else: - outputs_bases = (Outputs,) - add_base_class = False - - if add_base_class or template_fields: - Outputs = attrs.define(kw_only=True, slots=False)( - type("Outputs", outputs_bases, template_fields) - ) - - dct["Inputs"] = Inputs - dct["Outputs"] = Outputs - - task_klass = type(name, tuple(bases), dct) - - if not hasattr(task_klass, "executable"): - raise RuntimeError( - "Classes generated by `shell_task` should contain an `executable` " - "attribute specifying the shell tool to run" - ) - - task_klass.input_spec = pydra.engine.specs.SpecInfo( - name=f"{name}Inputs", fields=[], bases=(task_klass.Inputs,) - ) - task_klass.output_spec = pydra.engine.specs.SpecInfo( - name=f"{name}Outputs", fields=[], bases=(task_klass.Outputs,) - ) - - return task_klass - - -def shell_arg( - help_string: str, - default: ty.Any = attrs.NOTHING, - argstr: str = None, - position: int = None, - mandatory: bool = False, - sep: str = None, - allowed_values: list = None, - requires: list = None, - xor: list = None, - copyfile: bool = None, - container_path: bool = False, - output_file_template: str = None, - output_field_name: str = None, - keep_extension: bool = True, - readonly: bool = False, - formatter: ty.Callable = None, - **kwargs, -): - """ - Returns an attrs field with appropriate metadata for it to be added as an argument in - a Pydra shell command task definition - - Parameters - ------------ - help_string: str - A short description of the input field. - default : Any, optional - the default value for the argument - argstr: str, optional - A flag or string that is used in the command before the value, e.g. -v or - -v {inp_field}, but it could be and empty string, “”. If … are used, e.g. -v…, - the flag is used before every element if a list is provided as a value. If no - argstr is used the field is not part of the command. - position: int, optional - Position of the field in the command, could be nonnegative or negative integer. - If nothing is provided the field will be inserted between all fields with - nonnegative positions and fields with negative positions. - mandatory: bool, optional - If True user has to provide a value for the field, by default it is False - sep: str, optional - A separator if a list is provided as a value. - allowed_values: list, optional - List of allowed values for the field. - requires: list, optional - List of field names that are required together with the field. - xor: list, optional - List of field names that are mutually exclusive with the field. - copyfile: bool, optional - If True, a hard link is created for the input file in the output directory. If - hard link not possible, the file is copied to the output directory, by default - it is False - container_path: bool, optional - If True a path will be consider as a path inside the container (and not as a - local path, by default it is False - output_file_template: str, optional - If provided, the field is treated also as an output field and it is added to - the output spec. The template can use other fields, e.g. {file1}. Used in order - to create an output specification. - output_field_name: str, optional - If provided the field is added to the output spec with changed name. Used in - order to create an output specification. Used together with output_file_template - keep_extension: bool, optional - A flag that specifies if the file extension should be removed from the field value. - Used in order to create an output specification, by default it is True - readonly: bool, optional - If True the input field can’t be provided by the user but it aggregates other - input fields (for example the fields with argstr: -o {fldA} {fldB}), by default - it is False - formatter: function, optional - If provided the argstr of the field is created using the function. This function - can for example be used to combine several inputs into one command argument. The - function can take field (this input field will be passed to the function), - inputs (entire inputs will be passed) or any input field name (a specific input - field will be sent). - **kwargs - remaining keyword arguments are passed onto the underlying attrs.field function - """ - - metadata = { - "help_string": help_string, - "argstr": argstr, - "position": position, - "mandatory": mandatory, - "sep": sep, - "allowed_values": allowed_values, - "requires": requires, - "xor": xor, - "copyfile": copyfile, - "container_path": container_path, - "output_file_template": output_file_template, - "output_field_name": output_field_name, - "keep_extension": keep_extension, - "readonly": readonly, - "formatter": formatter, - } - - return attrs.field( - default=default, - metadata={k: v for k, v in metadata.items() if v is not None}, - **kwargs, - ) - - -def shell_out( - help_string: str, - mandatory: bool = False, - output_file_template: str = None, - output_field_name: str = None, - keep_extension: bool = True, - requires: list = None, - callable: ty.Callable = None, - **kwargs, -): - """Returns an attrs field with appropriate metadata for it to be added as an output of - a Pydra shell command task definition - - Parameters - ---------- - help_string: str - A short description of the input field. The same as in input_spec. - mandatory: bool, default: False - If True the output file has to exist, otherwise an error will be raised. - output_file_template: str, optional - If provided the output file name (or list of file names) is created using the - template. The template can use other fields, e.g. {file1}. The same as in - input_spec. - output_field_name: str, optional - If provided the field is added to the output spec with changed name. The same as - in input_spec. Used together with output_file_template - keep_extension: bool, default: True - A flag that specifies if the file extension should be removed from the field - value. The same as in input_spec. - requires: list - List of field names that are required to create a specific output. The fields - do not have to be a part of the output_file_template and if any field from the - list is not provided in the input, a NOTHING is returned for the specific output. - This has a different meaning than the requires form the input_spec. - callable: Callable - If provided the output file name (or list of file names) is created using the - function. The function can take field (the specific output field will be passed - to the function), output_dir (task output_dir will be used), stdout, stderr - (stdout and stderr of the task will be sent) inputs (entire inputs will be - passed) or any input field name (a specific input field will be sent). - **kwargs - remaining keyword arguments are passed onto the underlying attrs.field function - """ - metadata = { - "help_string": help_string, - "mandatory": mandatory, - "output_file_template": output_file_template, - "output_field_name": output_field_name, - "keep_extension": keep_extension, - "requires": requires, - "callable": callable, - } - - return attrs.field( - metadata={k: v for k, v in metadata.items() if v is not None}, **kwargs - ) - - -def _gen_output_template_fields(Inputs: type, Outputs: type) -> dict: - """Auto-generates output fields for inputs that specify an 'output_file_template' - - Parameters - ---------- - Inputs : type - Inputs specification class - Outputs : type - Outputs specification class - - Returns - ------- - template_fields: dict[str, attrs._make_CountingAttribute] - the template fields to add to the output spec - """ - annotations = {} - template_fields = {"__annotations__": annotations} - output_field_names = [f.name for f in attrs.fields(Outputs)] - for fld in attrs.fields(Inputs): - if "output_file_template" in fld.metadata: - if "output_field_name" in fld.metadata: - field_name = fld.metadata["output_field_name"] - else: - field_name = fld.name - # skip adding if the field already in the output_spec - exists_already = field_name in output_field_names - if not exists_already: - metadata = { - "help_string": fld.metadata["help_string"], - "mandatory": fld.metadata["mandatory"], - "keep_extension": fld.metadata["keep_extension"], - } - template_fields[field_name] = attrs.field(metadata=metadata) - annotations[field_name] = str - return template_fields diff --git a/pydra/mark/tests/__init__.py b/pydra/mark/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/pydra/mark/tests/test_shell.py b/pydra/mark/tests/test_shell.py deleted file mode 100644 index 6fee7259b1..0000000000 --- a/pydra/mark/tests/test_shell.py +++ /dev/null @@ -1,467 +0,0 @@ -import os -import tempfile -import attrs -from pathlib import Path -import pytest -import cloudpickle as cp -from pydra.mark import shell_task, shell_arg, shell_out - - -def list_entries(stdout): - return stdout.split("\n")[:-1] - - -@pytest.fixture -def tmpdir(): - return Path(tempfile.mkdtemp()) - - -@pytest.fixture(params=["static", "dynamic"]) -def Ls(request): - if request.param == "static": - - @shell_task - class Ls: - executable = "ls" - - class Inputs: - directory: os.PathLike = shell_arg( - help_string="the directory to list the contents of", - argstr="", - mandatory=True, - position=-1, - ) - hidden: bool = shell_arg( - help_string=("display hidden FS objects"), - argstr="-a", - default=False, - ) - long_format: bool = shell_arg( - help_string=( - "display properties of FS object, such as permissions, size and " - "timestamps " - ), - default=False, - argstr="-l", - ) - human_readable: bool = shell_arg( - help_string="display file sizes in human readable form", - argstr="-h", - default=False, - requires=["long_format"], - ) - complete_date: bool = shell_arg( - help_string="Show complete date in long format", - argstr="-T", - default=False, - requires=["long_format"], - xor=["date_format_str"], - ) - date_format_str: str = shell_arg( - help_string="format string for ", - argstr="-D", - default=attrs.NOTHING, - requires=["long_format"], - xor=["complete_date"], - ) - - class Outputs: - entries: list = shell_out( - help_string="list of entries returned by ls command", - callable=list_entries, - ) - - elif request.param == "dynamic": - Ls = shell_task( - "Ls", - executable="ls", - input_fields={ - "directory": { - "type": os.PathLike, - "help_string": "the directory to list the contents of", - "argstr": "", - "mandatory": True, - "position": -1, - }, - "hidden": { - "type": bool, - "help_string": "display hidden FS objects", - "argstr": "-a", - }, - "long_format": { - "type": bool, - "help_string": ( - "display properties of FS object, such as permissions, size and " - "timestamps " - ), - "argstr": "-l", - }, - "human_readable": { - "type": bool, - "help_string": "display file sizes in human readable form", - "argstr": "-h", - "requires": ["long_format"], - }, - "complete_date": { - "type": bool, - "help_string": "Show complete date in long format", - "argstr": "-T", - "requires": ["long_format"], - "xor": ["date_format_str"], - }, - "date_format_str": { - "type": str, - "help_string": "format string for ", - "argstr": "-D", - "requires": ["long_format"], - "xor": ["complete_date"], - }, - }, - output_fields={ - "entries": { - "type": list, - "help_string": "list of entries returned by ls command", - "callable": list_entries, - } - }, - ) - - else: - assert False - - return Ls - - -def test_shell_fields(Ls): - assert [a.name for a in attrs.fields(Ls.Inputs)] == [ - "executable", - "args", - "directory", - "hidden", - "long_format", - "human_readable", - "complete_date", - "date_format_str", - ] - - assert [a.name for a in attrs.fields(Ls.Outputs)] == [ - "return_code", - "stdout", - "stderr", - "entries", - ] - - -def test_shell_pickle_roundtrip(Ls, tmpdir): - pkl_file = tmpdir / "ls.pkl" - with open(pkl_file, "wb") as f: - cp.dump(Ls, f) - - with open(pkl_file, "rb") as f: - RereadLs = cp.load(f) - - assert RereadLs is Ls - - -def test_shell_run(Ls, tmpdir): - Path.touch(tmpdir / "a") - Path.touch(tmpdir / "b") - Path.touch(tmpdir / "c") - - ls = Ls(directory=tmpdir, long_format=True) - - # Test cmdline - assert ls.inputs.directory == tmpdir - assert not ls.inputs.hidden - assert ls.inputs.long_format - assert ls.cmdline == f"ls -l {tmpdir}" - - # Drop Long format flag to make output simpler - ls = Ls(directory=tmpdir) - result = ls() - - assert result.output.entries == ["a", "b", "c"] - - -@pytest.fixture(params=["static", "dynamic"]) -def A(request): - if request.param == "static": - - @shell_task - class A: - executable = "cp" - - class Inputs: - x: os.PathLike = shell_arg( - help_string="an input file", argstr="", position=0 - ) - y: str = shell_arg( - help_string="path of output file", - output_file_template="{x}_out", - argstr="", - ) - - elif request.param == "dynamic": - A = shell_task( - "A", - executable="cp", - input_fields={ - "x": { - "type": os.PathLike, - "help_string": "an input file", - "argstr": "", - "position": 0, - }, - "y": { - "type": str, - "help_string": "path of output file", - "argstr": "", - "output_file_template": "{x}_out", - }, - }, - ) - else: - assert False - - return A - - -def test_shell_output_file_template(A): - assert "y" in [a.name for a in attrs.fields(A.Outputs)] - - -def test_shell_output_field_name_static(): - @shell_task - class A: - executable = "cp" - - class Inputs: - x: os.PathLike = shell_arg( - help_string="an input file", argstr="", position=0 - ) - y: str = shell_arg( - help_string="path of output file", - output_file_template="{x}_out", - output_field_name="y_out", - argstr="", - ) - - assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] - - -def test_shell_output_field_name_dynamic(): - A = shell_task( - "A", - executable="cp", - input_fields={ - "x": { - "type": os.PathLike, - "help_string": "an input file", - "argstr": "", - "position": 0, - }, - "y": { - "type": str, - "help_string": "path of output file", - "argstr": "", - "output_field_name": "y_out", - "output_file_template": "{x}_out", - }, - }, - ) - - assert "y_out" in [a.name for a in attrs.fields(A.Outputs)] - - -def get_file_size(y: Path): - result = os.stat(y) - return result.st_size - - -def test_shell_bases_dynamic(A, tmpdir): - B = shell_task( - "B", - output_fields={ - "out_file_size": { - "type": int, - "help_string": "size of the output directory", - "callable": get_file_size, - } - }, - bases=[A], - ) - - xpath = tmpdir / "x.txt" - ypath = tmpdir / "y.txt" - Path.touch(xpath) - - b = B(x=xpath, y=str(ypath)) - - result = b() - - assert b.inputs.x == xpath - assert result.output.y == str(ypath) - - -def test_shell_bases_static(A, tmpdir): - @shell_task - class B(A): - class Outputs: - out_file_size: int = shell_out( - help_string="size of the output directory", callable=get_file_size - ) - - xpath = tmpdir / "x.txt" - ypath = tmpdir / "y.txt" - Path.touch(xpath) - - b = B(x=xpath, y=str(ypath)) - - result = b() - - assert b.inputs.x == xpath - assert result.output.y == str(ypath) - - -def test_shell_inputs_outputs_bases_dynamic(tmpdir): - A = shell_task( - "A", - "ls", - input_fields={ - "directory": { - "type": os.PathLike, - "help_string": "input directory", - "argstr": "", - "position": -1, - } - }, - output_fields={ - "entries": { - "type": list, - "help_string": "list of entries returned by ls command", - "callable": list_entries, - } - }, - ) - B = shell_task( - "B", - "ls", - input_fields={ - "hidden": { - "type": bool, - "argstr": "-a", - "help_string": "show hidden files", - "default": False, - } - }, - bases=[A], - inputs_bases=[A.Inputs], - ) - - Path.touch(tmpdir / ".hidden") - - b = B(directory=tmpdir, hidden=True) - - assert b.inputs.directory == tmpdir - assert b.inputs.hidden - assert b.cmdline == f"ls -a {tmpdir}" - - result = b() - assert result.output.entries == [".", "..", ".hidden"] - - -def test_shell_inputs_outputs_bases_static(tmpdir): - @shell_task - class A: - executable = "ls" - - class Inputs: - directory: os.PathLike = shell_arg( - help_string="input directory", argstr="", position=-1 - ) - - class Outputs: - entries: list = shell_out( - help_string="list of entries returned by ls command", - callable=list_entries, - ) - - @shell_task - class B(A): - class Inputs(A.Inputs): - hidden: bool = shell_arg( - help_string="show hidden files", - argstr="-a", - default=False, - ) - - Path.touch(tmpdir / ".hidden") - - b = B(directory=tmpdir, hidden=True) - - assert b.inputs.directory == tmpdir - assert b.inputs.hidden - - result = b() - assert result.output.entries == [".", "..", ".hidden"] - - -def test_shell_missing_executable_static(): - with pytest.raises(RuntimeError, match="should contain an `executable`"): - - @shell_task - class A: - class Inputs: - directory: os.PathLike = shell_arg( - help_string="input directory", argstr="", position=-1 - ) - - class Outputs: - entries: list = shell_out( - help_string="list of entries returned by ls command", - callable=list_entries, - ) - - -def test_shell_missing_executable_dynamic(): - with pytest.raises(RuntimeError, match="should contain an `executable`"): - A = shell_task( - "A", - executable=None, - input_fields={ - "directory": { - "type": os.PathLike, - "help_string": "input directory", - "argstr": "", - "position": -1, - } - }, - output_fields={ - "entries": { - "type": list, - "help_string": "list of entries returned by ls command", - "callable": list_entries, - } - }, - ) - - -def test_shell_missing_inputs_static(): - with pytest.raises(RuntimeError, match="should contain an `Inputs`"): - - @shell_task - class A: - executable = "ls" - - class Outputs: - entries: list = shell_out( - help_string="list of entries returned by ls command", - callable=list_entries, - ) - - -def test_shell_decorator_misuse(A): - with pytest.raises( - RuntimeError, match=("`shell_task` should not be provided any other arguments") - ): - shell_task(A, executable="cp") diff --git a/pydra/tasks/__init__.py b/pydra/tasks/__init__.py deleted file mode 100644 index fae53c2d92..0000000000 --- a/pydra/tasks/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -""" Pydra tasks - -The ``pydra.tasks`` namespace is reserved for collections of Tasks, to be managed and -packaged separately. -To create a task package, please fork the `pydra-tasks-template -`__. -""" - -# This call enables pydra.tasks to be used as a namespace package when installed -# in editable mode. In normal installations it has no effect. -__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/pydra/tasks/common/__init__.py b/pydra/tasks/common/__init__.py new file mode 100644 index 0000000000..ff854eeee5 --- /dev/null +++ b/pydra/tasks/common/__init__.py @@ -0,0 +1,7 @@ +from fileformats.application import TextSerialization +from pydra.design import python + + +@python.define +def LoadJson(file: TextSerialization) -> dict | list: + return file.load() From 602c8fe4c88f2e5d03e9fc5aaa1011ee38afdfb6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 21 Dec 2024 22:35:18 +1100 Subject: [PATCH 079/342] fixing up docs --- docs/source/conf.py | 3 +- docs/source/howto/create-task-package.ipynb | 42 ++++++++++----------- docs/source/tutorial/execution.ipynb | 4 +- pydra/tasks/common/__init__.py | 8 ++-- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index a82719036d..552caab751 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -83,7 +83,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: @@ -279,7 +279,6 @@ # If false, no module index is generated. # latex_domain_indices = True - # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples diff --git a/docs/source/howto/create-task-package.ipynb b/docs/source/howto/create-task-package.ipynb index 6479dbc78f..36777b7084 100644 --- a/docs/source/howto/create-task-package.ipynb +++ b/docs/source/howto/create-task-package.ipynb @@ -1,23 +1,23 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Creating a task package" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Creating a task package" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/docs/source/tutorial/execution.ipynb b/docs/source/tutorial/execution.ipynb index 64aebe9249..3697736388 100644 --- a/docs/source/tutorial/execution.ipynb +++ b/docs/source/tutorial/execution.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -22,7 +22,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapplication\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Json\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcommon\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LoadJson\n\u001b[0;32m----> 4\u001b[0m load_json \u001b[38;5;241m=\u001b[39m LoadJson(file\u001b[38;5;241m=\u001b[39m\u001b[43mJson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/path/to/my/file.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 5\u001b[0m result \u001b[38;5;241m=\u001b[39m load_json(plugin\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserial\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(result\u001b[38;5;241m.\u001b[39moutput)\n", + "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapplication\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Json\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcommon\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LoadJson\n\u001b[0;32m----> 4\u001b[0m load_json \u001b[38;5;241m=\u001b[39m LoadJson(file\u001b[38;5;241m=\u001b[39m\u001b[43mJson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/path/to/my/file.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 5\u001b[0m result \u001b[38;5;241m=\u001b[39m load_json(plugin\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserial\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(result\u001b[38;5;241m.\u001b[39moutput)\n", "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1050\u001b[0m, in \u001b[0;36mFileSet.sample\u001b[0;34m(cls, dest_dir, seed, stem)\u001b[0m\n\u001b[1;32m 1047\u001b[0m dest_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n\u001b[1;32m 1048\u001b[0m \u001b[38;5;66;03m# Need to use mock to get an instance in order to use the singledispatch-based\u001b[39;00m\n\u001b[1;32m 1049\u001b[0m \u001b[38;5;66;03m# extra decorator\u001b[39;00m\n\u001b[0;32m-> 1050\u001b[0m fspaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43mSampleFileGenerator\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfname_stem\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1054\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m(fspaths)\n", "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1083\u001b[0m, in \u001b[0;36mFileSet.sample_data\u001b[0;34m(cls, generator)\u001b[0m\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Converts the `generate_sample_data` method into a class method by mocking up\u001b[39;00m\n\u001b[1;32m 1070\u001b[0m \u001b[38;5;124;03ma class instance and calling the method on it\u001b[39;00m\n\u001b[1;32m 1071\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;124;03m the generated file-system paths\u001b[39;00m\n\u001b[1;32m 1081\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1082\u001b[0m mock: FileSet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmock()\n\u001b[0;32m-> 1083\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_sample_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/extras.py:38\u001b[0m, in \u001b[0;36mextra..decorated\u001b[0;34m(obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m extras \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tp \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mreferenced_types(): \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m extras\u001b[38;5;241m.\u001b[39mappend(\u001b[43mimport_extras_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtp\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dispatch_method(obj, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", diff --git a/pydra/tasks/common/__init__.py b/pydra/tasks/common/__init__.py index ff854eeee5..e335197ea2 100644 --- a/pydra/tasks/common/__init__.py +++ b/pydra/tasks/common/__init__.py @@ -1,7 +1,9 @@ -from fileformats.application import TextSerialization +import json +from fileformats.application import Json from pydra.design import python @python.define -def LoadJson(file: TextSerialization) -> dict | list: - return file.load() +def LoadJson(file: Json) -> dict | list: + with open(file.path) as file: + return json.load(file) From b4d2b16621fc7c883254399c8d83e807062d4c4e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 21 Dec 2024 22:36:09 +1100 Subject: [PATCH 080/342] removed state references from execution code (they will be handled before the task is initialised by the workflow) --- pydra/engine/helpers.py | 20 +++++--------------- pydra/engine/specs.py | 3 ++- pydra/engine/submitter.py | 33 +++++++++++---------------------- pydra/engine/workers.py | 4 ++-- 4 files changed, 20 insertions(+), 40 deletions(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index c03ade9205..04b0513afa 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -6,7 +6,6 @@ import os import inspect import sys -from uuid import uuid4 import getpass import typing as ty import subprocess as sp @@ -414,9 +413,7 @@ def get_available_cpus(): return os.cpu_count() -def load_and_run( - task_pkl, ind=None, rerun=False, submitter=None, plugin=None, **kwargs -): +def load_and_run(task_pkl, rerun=False, submitter=None, plugin=None, **kwargs): """ loading a task from a pickle file, settings proper input and running the task @@ -425,7 +422,7 @@ def load_and_run( from .specs import Result try: - task = load_task(task_pkl=task_pkl, ind=ind) + task = load_task(task_pkl=task_pkl) except Exception: if task_pkl.parent.exists(): etype, eval, etr = sys.exc_info() @@ -455,27 +452,20 @@ def load_and_run( return resultfile -async def load_and_run_async(task_pkl, ind=None, submitter=None, rerun=False, **kwargs): +async def load_and_run_async(task_pkl, submitter=None, rerun=False, **kwargs): """ loading a task from a pickle file, settings proper input and running the workflow """ - task = load_task(task_pkl=task_pkl, ind=ind) + task = load_task(task_pkl=task_pkl) await task._run(submitter=submitter, rerun=rerun, **kwargs) -def load_task(task_pkl, ind=None): +def load_task(task_pkl): """loading a task from a pickle file, settings proper input for the specific ind""" if isinstance(task_pkl, str): task_pkl = Path(task_pkl) task = cp.loads(task_pkl.read_bytes()) - if ind is not None: - ind_inputs = task.get_input_el(ind) - task.inputs = attrs.evolve(task.inputs, **ind_inputs) - task._pre_split = True - task.state = None - # resetting uid for task - task._uid = uuid4().hex return task diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 01521a73d0..22331afcc9 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -368,7 +368,8 @@ def __getstate__(self): def __setstate__(self, state): if state["output"] is not None: state["output"] = cp.loads(state["output"]) - self.__dict__.update(state) + for name, val in state.items(): + setattr(self, name, val) def get_output_field(self, field_name): """Used in get_values in Workflow diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index f92b996bd7..0b7f2c9841 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -88,12 +88,7 @@ async def submit_from_call(self, runnable, rerun, environment): runnable._reset() else: # 2 - if runnable.state is None: - # run_el should always return a coroutine - await self.worker.run_el(runnable, rerun=rerun, environment=environment) - # 3 - else: - await self.expand_runnable(runnable, wait=True, rerun=rerun) # TODO + await self.expand_runnable(runnable, wait=True, rerun=rerun) # TODO return True async def expand_runnable(self, runnable, wait=False, rerun=False): @@ -123,20 +118,17 @@ async def expand_runnable(self, runnable, wait=False, rerun=False): raise NotImplementedError() futures = set() - if runnable.state is None: - raise Exception("Only runnables with state should reach here") - task_pkl = await prepare_runnable_with_state(runnable) + task_pkl = await prepare_runnable(runnable) - for sidx in range(len(runnable.state.states_val)): - if is_workflow(runnable): - # job has no state anymore - futures.add( - # This unpickles and runs workflow - why are we pickling? - asyncio.create_task(load_and_run_async(task_pkl, sidx, self, rerun)) - ) - else: - futures.add(self.worker.run_el((sidx, task_pkl, runnable), rerun=rerun)) + if is_workflow(runnable): + # job has no state anymore + futures.add( + # This unpickles and runs workflow - why are we pickling? + asyncio.create_task(load_and_run_async(task_pkl, self, rerun)) + ) + else: + futures.add(self.worker.run_el((task_pkl, runnable), rerun=rerun)) if wait and futures: # if wait is True, we are at the end of the graph / state expansion. @@ -338,10 +330,7 @@ def is_runnable(graph, obj): return True -async def prepare_runnable_with_state(runnable): - runnable.state.prepare_states(runnable.inputs, cont_dim=runnable.cont_dim) - runnable.state.prepare_inputs() - logger.debug(f"Expanding {runnable} into {len(runnable.state.states_val)} states") +async def prepare_runnable(runnable): return runnable.pickle_task() diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 30cf65ea86..e6bf3cced7 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -145,8 +145,8 @@ async def exec_serial(self, runnable, rerun=False, environment=None): if isinstance(runnable, Task): return runnable._run(rerun, environment=environment) else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, _ = runnable - return load_and_run(task_main_pkl, ind, rerun, environment=environment) + task_main_pkl, _ = runnable + return load_and_run(task_main_pkl, rerun, environment=environment) async def fetch_finished(self, futures): await asyncio.gather(*futures) From fb86964d61f2d4fe3c6920bd7bd984a81adf1ca8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 21 Dec 2024 22:53:35 +1100 Subject: [PATCH 081/342] debugging test utils --- pydra/engine/tests/utils.py | 84 +++++++++++++++++-------------------- 1 file changed, 38 insertions(+), 46 deletions(-) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 64fe7591c3..b88dc43348 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -10,8 +10,7 @@ import pytest from fileformats.generic import File from ..submitter import Submitter -from pydra.design import workflow -from pydra import mark +from pydra.design import workflow, python need_docker = pytest.mark.skipif( @@ -56,12 +55,12 @@ def result_submitter(shell_task, plugin): DOT_FLAG = False -@mark.task +@python.define def op_4var(a, b, c, d) -> str: return f"{a} {b} {c} {d}" -@mark.task +@python.define def fun_addtwo(a: int) -> int: import time @@ -71,7 +70,7 @@ def fun_addtwo(a: int) -> int: return a + 2 -@mark.task +@python.define def fun_addtwo_notype(a): import time @@ -81,7 +80,7 @@ def fun_addtwo_notype(a): return a + 2 -@mark.task +@python.define def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: import time @@ -91,25 +90,24 @@ def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: return a + 2 -@mark.task +@python.define def fun_addvar( a: ty.Union[int, float], b: ty.Union[int, float] ) -> ty.Union[int, float]: return a + b -@mark.task +@python.define def fun_addvar_notype(a, b): return a + b -@mark.task -@mark.annotate({"return": {"sum": float, "sub": float}}) +@python.define(outputs={"sum": float, "sub": float}) def fun_addsubvar(a: float, b: float): return a + b, a - b -@mark.task +@python.define def fun_addvar_none(a: int, b: ty.Optional[int]) -> int: if b is None: return a @@ -117,131 +115,127 @@ def fun_addvar_none(a: int, b: ty.Optional[int]) -> int: return a + b -@mark.task +@python.define def fun_addvar_default(a: int, b: int = 1) -> int: return a + b -@mark.task +@python.define def fun_addvar_default_notype(a, b=1): return a + b -@mark.task +@python.define def fun_addvar3(a: int, b: int, c: int) -> int: return a + b + c -@mark.task +@python.define def fun_addvar4(a: int, b: int, c: int, d: int) -> int: return a + b + c + d -@mark.task +@python.define def moment(lst: ty.List[float], n: float) -> float: return sum([i**n for i in lst]) / len(lst) -@mark.task +@python.define def fun_div(a: ty.Union[int, float], b: ty.Union[int, float]) -> float: return a / b -@mark.task +@python.define def multiply(x: int, y: int) -> int: return x * y -@mark.task +@python.define def multiply_list(x: list, y: int) -> list: return x * y -@mark.task +@python.define def multiply_mixed(x: list, y: int) -> list: return x * y -@mark.task +@python.define def add2(x: int) -> int: if x == 1 or x == 12: time.sleep(1) return x + 2 -@mark.task +@python.define def raise_xeq1(x: int) -> int: if x == 1: raise Exception("x is 1, so i'm raising an exception!") return x -@mark.task -@mark.annotate({"return": {"out_add": float, "out_sub": float}}) +@python.define(outputs={"out_add": float, "out_sub": float}) def add2_sub2_res(res): """function that takes entire output as an input""" return res["out"] + 2, res["out"] - 2 -@mark.task -@mark.annotate({"return": {"out_add": ty.List[float], "out_sub": ty.List[float]}}) +@python.define(outputs={"out_add": ty.List[float], "out_sub": ty.List[float]}) def add2_sub2_res_list(res): """function that takes entire output as an input""" return [r["out"] + 2 for r in res], [r["out"] - 2 for r in res] -@mark.task +@python.define def power(a: int, b: int) -> int: return a**b -@mark.task +@python.define def identity(x): return x -@mark.task -def identity_2flds( - x1, x2 -) -> ty.NamedTuple("Output", [("out1", ty.Any), ("out2", ty.Any)]): +@python.define(outputs={"out1": ty.Any, "out2": ty.Any}) +def identity_2flds(x1, x2): return x1, x2 -@mark.task +@python.define def ten(x) -> int: return 10 -@mark.task +@python.define def add2_wait(x: int) -> int: time.sleep(2) return x + 2 -@mark.task +@python.define def list_output(x: int) -> ty.List[int]: return [x, 2 * x, 3 * x] -@mark.task +@python.define def list_sum(x: ty.Sequence[ty.Union[int, float]]) -> ty.Union[int, float]: return sum(x) -@mark.task +@python.define def fun_dict(d: dict) -> str: kv_list = [f"{k}:{v}" for (k, v) in d.items()] return "_".join(kv_list) -@mark.task +@python.define def fun_write_file(filename: Path, text="hello") -> File: with open(filename, "w") as f: f.write(text) return File(filename) -@mark.task +@python.define def fun_write_file_list( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" ) -> ty.List[File]: @@ -252,7 +246,7 @@ def fun_write_file_list( return filename_list -@mark.task +@python.define def fun_write_file_list2dict( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" ) -> ty.Dict[str, ty.Union[File, int]]: @@ -266,14 +260,14 @@ def fun_write_file_list2dict( return filename_dict -@mark.task +@python.define def fun_file(filename: File): with open(filename) as f: txt = f.read() return txt -@mark.task +@python.define def fun_file_list(filename_list: ty.List[File]): txt_list = [] for filename in filename_list: @@ -349,14 +343,12 @@ def Workflow(x): return Workflow(x=5) -@mark.task -@mark.annotate({"return": {"sum": int, "products": ty.List[int]}}) +@python.define(outputs={"sum": int, "products": ty.List[int]}) def list_mult_sum(scalar: int, in_list: ty.List[int]) -> ty.Tuple[int, ty.List[int]]: products = [scalar * x for x in in_list] return functools.reduce(operator.add, products, 0), products -@mark.task -@mark.annotate({"return": {"x": str, "y": int, "z": float}}) +@python.define(outputs={"x": str, "y": int, "z": float}) def foo(a: str, b: int, c: float) -> ty.Tuple[str, int, float]: return a, b, c From 97c6f1107a32717b38607730f0742581f981001c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 21 Dec 2024 22:54:01 +1100 Subject: [PATCH 082/342] removed implicit extraction of output names from return values --- pydra/design/base.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 61394e7a68..996fface4e 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -783,19 +783,6 @@ def extract_function_inputs_and_outputs( else: inputs[inpt_name] = arg_type(type=inpt, default=default) return_type = type_hints.get("return", ty.Any) - if outputs is None: - src = inspect.getsource(function).strip() - return_lines = re.findall(r"\n\s+return .*$", src) - if len(return_lines) == 1 and src.endswith(return_lines[0]): - return_line = return_lines[0].split("#")[0] - implicit_outputs = [ - o.strip() - for o in re.match(r"\s*return\s+(.*)", return_line).group(1).split(",") - ] - if len(implicit_outputs) and all( - re.match(r"^\w+$", o) for o in implicit_outputs - ): - outputs = implicit_outputs if outputs and len(outputs) > 1: if return_type is not ty.Any: if ty.get_origin(return_type) is not tuple: From 0cfcc5f3882916f3a4d9a140dac515c767af2186 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 21 Dec 2024 22:57:07 +1100 Subject: [PATCH 083/342] cleaned up all @mark.task to @python.define --- pydra/engine/tests/test_profiles.py | 4 +- pydra/engine/tests/test_specs.py | 8 +- pydra/engine/tests/test_submitter.py | 24 ++--- pydra/engine/tests/test_task.py | 122 ++++++++++++------------- pydra/engine/tests/test_tasks_files.py | 14 +-- pydra/engine/tests/test_workflow.py | 10 +- pydra/utils/tests/test_typing.py | 4 +- pydra/utils/tests/utils.py | 4 +- 8 files changed, 95 insertions(+), 95 deletions(-) diff --git a/pydra/engine/tests/test_profiles.py b/pydra/engine/tests/test_profiles.py index 0a0f5e17b5..19de274318 100644 --- a/pydra/engine/tests/test_profiles.py +++ b/pydra/engine/tests/test_profiles.py @@ -1,5 +1,5 @@ from ..helpers import load_task -from pydra import mark +from pydra.design import python import numpy as np from pympler import asizeof @@ -10,7 +10,7 @@ def generate_list(l): return np.arange(l).tolist() -@mark.task +@python.define def show_var(a): return a diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 1e054a4eb2..6e1bf8f95e 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -21,7 +21,7 @@ # from ..helpers import make_klass from .utils import foo -from pydra import mark +from pydra.design import python import pytest @@ -352,7 +352,7 @@ def test_lazy_field_cast(): def test_lazy_field_multi_same_split(): - @mark.task + @python.define def f(x: ty.List[int]) -> ty.List[int]: return x @@ -369,7 +369,7 @@ def f(x: ty.List[int]) -> ty.List[int]: def test_lazy_field_multi_diff_split(): - @mark.task + @python.define def f(x: ty.Any, y: ty.Any) -> ty.Any: return x @@ -390,7 +390,7 @@ def f(x: ty.Any, y: ty.Any) -> ty.Any: def test_wf_lzin_split(): - @mark.task + @python.define def identity(x: int) -> int: return x diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 4848845007..63a5c8cf3e 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -20,12 +20,12 @@ from ..core import Task from ..submitter import Submitter from ..workers import SerialWorker -from pydra import mark +from pydra.design import python from pathlib import Path from datetime import datetime -@mark.task +@python.define def sleep_add_one(x): time.sleep(1) return x + 1 @@ -302,7 +302,7 @@ def test_slurm_args_2(tmpdir): sub(task) -@mark.task +@python.define def sleep(x, job_name_part): time.sleep(x) import subprocess as sp @@ -319,7 +319,7 @@ def sleep(x, job_name_part): return x -@mark.task +@python.define def cancel(job_name_part): import subprocess as sp @@ -580,7 +580,7 @@ def test_sge_no_limit_maxthreads(tmpdir): def test_hash_changes_in_task_inputs_file(tmp_path): - @mark.task + @python.define def output_dir_as_input(out_dir: Directory) -> Directory: (out_dir.fspath / "new-file.txt").touch() return out_dir @@ -599,7 +599,7 @@ def __bytes_repr__(self, cache) -> ty.Iterator[bytes]: """Random 128-bit bytestring""" yield secrets.token_bytes(16) - @mark.task + @python.define def unstable_input(unstable: Unstable) -> int: return unstable.value @@ -609,7 +609,7 @@ def unstable_input(unstable: Unstable) -> int: def test_hash_changes_in_workflow_inputs(tmp_path): - @mark.task + @python.define def output_dir_as_output(out_dir: Path) -> Directory: (out_dir / "new-file.txt").touch() return out_dir @@ -639,17 +639,17 @@ def __bytes_repr__(self, cache): hopefully cases like this will be very rare""" yield bytes(self.value) - @mark.task + @python.define @mark.annotate({"return": {"x": X, "y": int}}) def identity(x: X) -> ty.Tuple[X, int]: return x, 99 - @mark.task + @python.define def alter_x(y): X.value = 2 return y - @mark.task + @python.define def to_tuple(x, y): return (x, y) @@ -670,7 +670,7 @@ def to_tuple(x, y): result = sub(wf) -@mark.task +@python.define def to_tuple(x, y): return (x, y) @@ -693,7 +693,7 @@ async def exec_serial(self, runnable, rerun=False, environment=None): return super().exec_serial(runnable, rerun, environment) -@mark.task +@python.define def add_env_var_task(x: int) -> int: return x + int(os.environ.get("BYO_ADD_VAR", 0)) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 77d8529bd6..acea1497ad 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -7,7 +7,7 @@ from pathlib import Path import json import glob as glob -from pydra import mark +from pydra.design import python from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages from ..task import AuditFlag, ShellTask from pydra.engine.specs import argstr_formatting @@ -29,7 +29,7 @@ ) -@mark.task +@python.define def funaddtwo(a): return a + 2 @@ -71,7 +71,7 @@ def test_checksum(): def test_annotated_func(): - @mark.task + @python.define def testfunc( a: int, b: float = 0.1 ) -> ty.NamedTuple("Output", [("out_out", float)]): @@ -117,7 +117,7 @@ def testfunc( def test_annotated_func_dictreturn(): """Test mapping from returned dictionary to output spec.""" - @mark.task + @python.define @mark.annotate({"return": {"sum": int, "mul": ty.Optional[int]}}) def testfunc(a: int, b: int): return dict(sum=a + b, diff=a - b) @@ -138,7 +138,7 @@ def testfunc(a: int, b: int): def test_annotated_func_multreturn(): """the function has two elements in the return statement""" - @mark.task + @python.define def testfunc( a: float, ) -> ty.NamedTuple("Output", [("fractional", float), ("integer", int)]): @@ -177,7 +177,7 @@ def testfunc( def test_annotated_input_func_1(): """the function with annotated input (float)""" - @mark.task + @python.define def testfunc(a: float): return a @@ -188,7 +188,7 @@ def testfunc(a: float): def test_annotated_input_func_2(): """the function with annotated input (int, but float provided)""" - @mark.task + @python.define def testfunc(a: int): return a @@ -199,7 +199,7 @@ def testfunc(a: int): def test_annotated_input_func_2a(): """the function with annotated input (int, but float provided)""" - @mark.task + @python.define def testfunc(a: int): return a @@ -211,7 +211,7 @@ def testfunc(a: int): def test_annotated_input_func_3(): """the function with annotated input (list)""" - @mark.task + @python.define def testfunc(a: list): return sum(a) @@ -222,7 +222,7 @@ def testfunc(a: list): def test_annotated_input_func_3a(): """the function with annotated input (list of floats)""" - @mark.task + @python.define def testfunc(a: ty.List[float]): return sum(a) @@ -235,7 +235,7 @@ def test_annotated_input_func_3b(): (list of floats - int and float provided, should be fine) """ - @mark.task + @python.define def testfunc(a: ty.List[float]): return sum(a) @@ -248,7 +248,7 @@ def test_annotated_input_func_3c_excep(): (list of ints - int and float provided, should raise an error) """ - @mark.task + @python.define def testfunc(a: ty.List[int]): return sum(a) @@ -259,7 +259,7 @@ def testfunc(a: ty.List[int]): def test_annotated_input_func_4(): """the function with annotated input (dictionary)""" - @mark.task + @python.define def testfunc(a: dict): return sum(a.values()) @@ -270,7 +270,7 @@ def testfunc(a: dict): def test_annotated_input_func_4a(): """the function with annotated input (dictionary of floats)""" - @mark.task + @python.define def testfunc(a: ty.Dict[str, float]): return sum(a.values()) @@ -281,7 +281,7 @@ def testfunc(a: ty.Dict[str, float]): def test_annotated_input_func_4b_excep(): """the function with annotated input (dictionary of ints, but float provided)""" - @mark.task + @python.define def testfunc(a: ty.Dict[str, int]): return sum(a.values()) @@ -295,7 +295,7 @@ def test_annotated_input_func_5(): so no error for 3.5 """ - @mark.task + @python.define def testfunc(a: ty.Dict[str, ty.List]): return sum(a["el1"]) @@ -308,7 +308,7 @@ def test_annotated_input_func_5a_except(): list is provided as a dict value (instead a dict), so error is raised """ - @mark.task + @python.define def testfunc(a: ty.Dict[str, ty.Dict[str, float]]): return sum(a["el1"]) @@ -321,7 +321,7 @@ def test_annotated_input_func_6(): the validator should unpack values from the Union """ - @mark.task + @python.define def testfunc(a: ty.Dict[str, ty.Union[float, int]]): return sum(a["el1"]) @@ -334,7 +334,7 @@ def test_annotated_input_func_6a_excep(): the validator should unpack values from the Union and raise an error for 3.5 """ - @mark.task + @python.define def testfunc(a: ty.Dict[str, ty.Union[str, int]]): return sum(a["el1"]) @@ -348,7 +348,7 @@ def test_annotated_input_func_7(): it should work, the validator tries to guess if this is a field with a splitter """ - @mark.task + @python.define def testfunc(a: float): return a @@ -361,7 +361,7 @@ def test_annotated_input_func_7a_excep(): list of float provided - should raise an error (list of int would be fine) """ - @mark.task + @python.define def testfunc(a: int): return a @@ -374,7 +374,7 @@ def test_annotated_input_func_8(): a single value is provided and should be converted to a list """ - @mark.task + @python.define def testfunc(a: MultiInputObj): return len(a) @@ -389,7 +389,7 @@ def test_annotated_input_func_8a(): a 1-el list is provided so shouldn't be changed """ - @mark.task + @python.define def testfunc(a: MultiInputObj): return len(a) @@ -405,7 +405,7 @@ def test_annotated_input_func_8b(): (input should still be converted to a list) """ - @mark.task + @python.define def testfunc(a: MultiInputObj): return len(a) @@ -422,7 +422,7 @@ def test_annotated_func_multreturn_exception(): but three element provided in the spec - should raise an error """ - @mark.task + @python.define def testfunc( a: float, ) -> ty.NamedTuple( @@ -439,7 +439,7 @@ def testfunc( def test_halfannotated_func(): - @mark.task + @python.define def testfunc(a, b) -> int: return a + b @@ -480,7 +480,7 @@ def testfunc(a, b) -> int: def test_halfannotated_func_multreturn(): - @mark.task + @python.define def testfunc(a, b) -> (int, int): return a + 1, b + 1 @@ -522,7 +522,7 @@ def testfunc(a, b) -> (int, int): def test_notannotated_func(): - @mark.task + @python.define def no_annots(c, d): return c + d @@ -538,7 +538,7 @@ def no_annots(c, d): def test_notannotated_func_returnlist(): - @mark.task + @python.define def no_annots(c, d): return [c, d] @@ -549,7 +549,7 @@ def no_annots(c, d): def test_halfannotated_func_multrun_returnlist(): - @mark.task + @python.define def no_annots(c, d) -> (list, float): return [c, d], c + d @@ -567,7 +567,7 @@ def test_notannotated_func_multreturn(): all elements should be returned as a tuple and set to "out" """ - @mark.task + @python.define def no_annots(c, d): return c + d, c - d @@ -585,7 +585,7 @@ def no_annots(c, d): def test_input_spec_func_1(): """the function w/o annotated, but input_spec is used""" - @mark.task + @python.define def testfunc(a): return a @@ -604,7 +604,7 @@ def test_input_spec_func_1a_except(): a TypeError is raised (float is provided instead of int) """ - @mark.task + @python.define def testfunc(a): return a @@ -622,7 +622,7 @@ def test_input_spec_func_1b_except(): metadata checks raise an error """ - @mark.task + @python.define def testfunc(a): return a @@ -645,7 +645,7 @@ def test_input_spec_func_1d_except(): input_spec doesn't contain 'a' input, an error is raised """ - @mark.task + @python.define def testfunc(a): return a @@ -660,7 +660,7 @@ def test_input_spec_func_2(): input_spec changes the type of the input (so error is not raised) """ - @mark.task + @python.define def testfunc(a: int): return a @@ -680,7 +680,7 @@ def test_input_spec_func_2a(): using the shorter syntax """ - @mark.task + @python.define def testfunc(a: int): return a @@ -699,7 +699,7 @@ def test_input_spec_func_3(): additional keys (allowed_values) are used in metadata """ - @mark.task + @python.define def testfunc(a): return a @@ -726,7 +726,7 @@ def test_input_spec_func_3a_except(): allowed_values is used in metadata and the ValueError is raised """ - @mark.task + @python.define def testfunc(a): return a @@ -753,7 +753,7 @@ def test_input_spec_func_4(): but b is set as mandatory in the input_spec, so error is raised if not provided """ - @mark.task + @python.define def testfunc(a, b=1): return a + b @@ -786,7 +786,7 @@ def test_input_spec_func_4a(): has a different default value, so value from the function is overwritten """ - @mark.task + @python.define def testfunc(a, b=1): return a + b @@ -814,7 +814,7 @@ def test_input_spec_func_5(): a single value is provided and should be converted to a list """ - @mark.task + @python.define def testfunc(a): return len(a) @@ -835,7 +835,7 @@ def testfunc(a): def test_output_spec_func_1(): """the function w/o annotated, but output_spec is used""" - @mark.task + @python.define def testfunc(a): return a @@ -855,7 +855,7 @@ def test_output_spec_func_1a_except(): float returned instead of int - TypeError """ - @mark.task + @python.define def testfunc(a): return a @@ -875,7 +875,7 @@ def test_output_spec_func_2(): output_spec changes the type of the output (so error is not raised) """ - @mark.task + @python.define def testfunc(a) -> int: return a @@ -896,7 +896,7 @@ def test_output_spec_func_2a(): using a shorter syntax """ - @mark.task + @python.define def testfunc(a) -> int: return a @@ -916,7 +916,7 @@ def test_output_spec_func_3(): MultiOutputObj is used, output is a 2-el list, so converter doesn't do anything """ - @mark.task + @python.define def testfunc(a, b): return [a, b] @@ -941,7 +941,7 @@ def test_output_spec_func_4(): MultiOutputObj is used, output is a 1el list, so converter return the element """ - @mark.task + @python.define def testfunc(a): return [a] @@ -962,7 +962,7 @@ def testfunc(a): def test_exception_func(): - @mark.task + @python.define def raise_exception(c, d): raise Exception() @@ -973,7 +973,7 @@ def raise_exception(c, d): def test_result_none_1(): """checking if None is properly returned as the result""" - @mark.task + @python.define def fun_none(x): return None @@ -985,7 +985,7 @@ def fun_none(x): def test_result_none_2(): """checking if None is properly set for all outputs""" - @mark.task + @python.define def fun_none(x) -> (ty.Any, ty.Any): return None @@ -998,7 +998,7 @@ def fun_none(x) -> (ty.Any, ty.Any): def test_audit_prov( tmpdir, ): - @mark.task + @python.define def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1020,7 +1020,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] def test_audit_task(tmpdir): - @mark.task + @python.define def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1200,7 +1200,7 @@ def test_audit_prov_messdir_1( ): """customized messenger dir""" - @mark.task + @python.define def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1228,7 +1228,7 @@ def test_audit_prov_messdir_2( ): """customized messenger dir in init""" - @mark.task + @python.define def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1260,7 +1260,7 @@ def test_audit_prov_wf( ): """FileMessenger for wf""" - @mark.task + @python.define def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1287,7 +1287,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] def test_audit_all( tmpdir, ): - @mark.task + @python.define def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1480,7 +1480,7 @@ def test_traceback(tmpdir): full traceback including the line in the python function """ - @mark.task + @python.define def fun_error(x): raise Exception("Error from the function") @@ -1507,7 +1507,7 @@ def test_traceback_wf(tmpdir): full traceback including the line in the python function """ - @mark.task + @python.define def fun_error(x): raise Exception("Error from the function") @@ -1534,7 +1534,7 @@ def test_rerun_errored(tmpdir, capfd): """Test rerunning a task containing errors. Only the errored tasks should be rerun""" - @mark.task + @python.define def pass_odds(x): if x % 2 == 0: print(f"x%2 = {x % 2} (error)\n") @@ -1576,7 +1576,7 @@ class A: def test_object_input(): """Test function tasks with object inputs""" - @mark.task + @python.define def testfunc(a: A): return a.x diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index 697afae9c3..2ed98a1388 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -5,21 +5,21 @@ import typing as ty from ..submitter import Submitter -from pydra import mark +from pydra.design import python from fileformats.generic import File, Directory -@mark.task +@python.define def dir_count_file(dirpath): return len(os.listdir(dirpath)) -@mark.task +@python.define def dir_count_file_annot(dirpath: Directory): return len(os.listdir(dirpath)) -@mark.task +@python.define def file_add2(file): array_inp = np.load(file) array_out = array_inp + 2 @@ -30,7 +30,7 @@ def file_add2(file): return file_out -@mark.task +@python.define def file_mult(file): array_inp = np.load(file) array_out = 10 * array_inp @@ -40,7 +40,7 @@ def file_mult(file): return file_out -@mark.task +@python.define def file_add2_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): array_inp = np.load(file) array_out = array_inp + 2 @@ -51,7 +51,7 @@ def file_add2_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): return file_out -@mark.task +@python.define def file_mult_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): array_inp = np.load(file) array_out = 10 * array_inp diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index d106134eb6..cfbaa4ad6f 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -34,7 +34,7 @@ DOT_FLAG, ) from ..submitter import Submitter -from pydra import mark +from pydra.design import python from ..specs import ShellSpec from pydra.utils import exc_info_matches @@ -4835,7 +4835,7 @@ def test_duplicate_input_on_split_wf(tmpdir): """ text = ["test"] * 2 - @mark.task + @python.define def printer(a): return a @@ -4862,13 +4862,13 @@ def test_inner_outer_wf_duplicate(tmpdir): task_list = ["First", "Second"] start_list = [3, 4] - @mark.task + @python.define def one_arg(start_number): for k in range(10): start_number += 1 return start_number - @mark.task + @python.define def one_arg_inner(start_number): for k in range(10): start_number += 1 @@ -4911,7 +4911,7 @@ def test_rerun_errored(tmpdir, capfd): """Test rerunning a workflow containing errors. Only the errored tasks and workflow should be rerun""" - @mark.task + @python.define def pass_odds(x): if x % 2 == 0: print(f"x%2 = {x % 2} (error)\n") diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index 56e84ba4d2..74ec853a84 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -5,7 +5,7 @@ from pathlib import Path import tempfile import pytest -from pydra import mark +from pydra.design import python from pydra.engine.specs import File from pydra.engine.workflow.lazy import LazyOutField from ..typing import TypeParser, MultiInputObj @@ -520,7 +520,7 @@ def test_type_coercion_realistic(): Path.touch(yet_another_file) file_list = [File(p) for p in (a_file, another_file, yet_another_file)] - @mark.task + @python.define @mark.annotate({"return": {"a": ty.List[File], "b": ty.List[str]}}) def f(x: ty.List[File], y: ty.Dict[str, ty.List[File]]): return list(itertools.chain(x, *y.values())), list(y.keys()) diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 169867fce6..2cd5ad357e 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -1,6 +1,6 @@ from fileformats.generic import File from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber -from pydra import mark +from pydra.design import python from pydra.engine.task import ShellTask from pydra.engine import specs from pydra.design import shell, python @@ -51,7 +51,7 @@ class Outputs(specs.ShellOutputs): executable = "echo" -@mark.task +@python.define def specific_func_task(in_file: MyFormatX) -> MyFormatX: return in_file From 4908d2117ba0c4f82693f006526daac18c1ab0af Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 24 Dec 2024 11:51:51 +1100 Subject: [PATCH 084/342] changed supported version to Py3.11, use Exception.add_note instead of reraising exceptions --- pydra/engine/core.py | 1 - pydra/engine/helpers.py | 8 +++----- pyproject.toml | 8 ++------ 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index e421e2fff8..40c059d076 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -367,7 +367,6 @@ def __call__( if submitter: with submitter as sub: - self.spec = attr.evolve(self.spec, **kwargs) res = sub(self, environment=environment) else: # tasks without state could be run without a submitter res = self._run(rerun=rerun, environment=environment, **kwargs) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 04b0513afa..c0536be299 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -435,7 +435,7 @@ def load_and_run(task_pkl, rerun=False, submitter=None, plugin=None, **kwargs): resultfile = task.output_dir / "_result.pklz" try: task(rerun=rerun, plugin=plugin, submitter=submitter, **kwargs) - except Exception as excinfo: + except Exception as e: # creating result and error files if missing errorfile = task.output_dir / "_error.pklz" if not errorfile.exists(): # not sure if this is needed @@ -445,10 +445,8 @@ def load_and_run(task_pkl, rerun=False, submitter=None, plugin=None, **kwargs): if not resultfile.exists(): # not sure if this is needed result = Result(output=None, runtime=None, errored=True) save(task.output_dir, result=result) - raise type(excinfo)( - str(excinfo.with_traceback(None)), - f" full crash report is here: {errorfile}", - ) + e.add_note(f" full crash report is here: {errorfile}") + raise return resultfile diff --git a/pyproject.toml b/pyproject.toml index 49bb7eaf1c..eb162c269d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,17 +6,14 @@ build-backend = "hatchling.build" name = "pydra" description = "Pydra dataflow engine" readme = "README.rst" -requires-python = ">=3.10" +requires-python = ">=3.11" dependencies = [ "attrs >=24.2.0", "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", - "fileformats >=0.8", - "importlib_resources >=5.7; python_version < '3.11'", + "fileformats >=0.14", "platformdirs >=2", - "typing_extensions >=4.6.3; python_version < '3.10'", - "typing_utils >=0.1.0; python_version < '3.10'", ] license = { file = "LICENSE" } authors = [{ name = "Nipype developers", email = "neuroimaging@python.org" }] @@ -32,7 +29,6 @@ classifiers = [ "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", From 44d8b679774004eadc48482063c89cc4c5b24fce Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 24 Dec 2024 11:55:49 +1100 Subject: [PATCH 085/342] got execution notebook example to work --- docs/source/tutorial/execution.ipynb | 45 +++++++++++++--------------- docs/tst.py | 14 +++++++++ pydra/tasks/common/__init__.py | 4 +-- 3 files changed, 37 insertions(+), 26 deletions(-) create mode 100644 docs/tst.py diff --git a/docs/source/tutorial/execution.ipynb b/docs/source/tutorial/execution.ipynb index 3697736388..441e1d890d 100644 --- a/docs/source/tutorial/execution.ipynb +++ b/docs/source/tutorial/execution.ipynb @@ -12,31 +12,21 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'pydra.mark'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapplication\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Json\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcommon\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LoadJson\n\u001b[0;32m----> 4\u001b[0m load_json \u001b[38;5;241m=\u001b[39m LoadJson(file\u001b[38;5;241m=\u001b[39m\u001b[43mJson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/path/to/my/file.json\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 5\u001b[0m result \u001b[38;5;241m=\u001b[39m load_json(plugin\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserial\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28mprint\u001b[39m(result\u001b[38;5;241m.\u001b[39moutput)\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1050\u001b[0m, in \u001b[0;36mFileSet.sample\u001b[0;34m(cls, dest_dir, seed, stem)\u001b[0m\n\u001b[1;32m 1047\u001b[0m dest_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n\u001b[1;32m 1048\u001b[0m \u001b[38;5;66;03m# Need to use mock to get an instance in order to use the singledispatch-based\u001b[39;00m\n\u001b[1;32m 1049\u001b[0m \u001b[38;5;66;03m# extra decorator\u001b[39;00m\n\u001b[0;32m-> 1050\u001b[0m fspaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m \u001b[49m\u001b[43mSampleFileGenerator\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfname_stem\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1054\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m(fspaths)\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1083\u001b[0m, in \u001b[0;36mFileSet.sample_data\u001b[0;34m(cls, generator)\u001b[0m\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Converts the `generate_sample_data` method into a class method by mocking up\u001b[39;00m\n\u001b[1;32m 1070\u001b[0m \u001b[38;5;124;03ma class instance and calling the method on it\u001b[39;00m\n\u001b[1;32m 1071\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;124;03m the generated file-system paths\u001b[39;00m\n\u001b[1;32m 1081\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1082\u001b[0m mock: FileSet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmock()\n\u001b[0;32m-> 1083\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_sample_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/extras.py:38\u001b[0m, in \u001b[0;36mextra..decorated\u001b[0;34m(obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m extras \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tp \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mreferenced_types(): \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m extras\u001b[38;5;241m.\u001b[39mappend(\u001b[43mimport_extras_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtp\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dispatch_method(obj, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/utils.py:230\u001b[0m, in \u001b[0;36mimport_extras_module\u001b[0;34m(klass)\u001b[0m\n\u001b[1;32m 228\u001b[0m extras_pypi \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfileformats-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msub_pkg\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-extras\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 230\u001b[0m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextras_pkg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo module named \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mextras_pkg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 89\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m:1387\u001b[0m, in \u001b[0;36m_gcd_import\u001b[0;34m(name, package, level)\u001b[0m\n", - "File \u001b[0;32m:1360\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:1331\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:935\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", - "File \u001b[0;32m:995\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", - "File \u001b[0;32m:488\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n", - "File \u001b[0;32m~/git/workflows/fileformats/extras/fileformats/extras/application/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m archive\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m serialization\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m medical\n", - "File \u001b[0;32m~/git/workflows/fileformats/extras/fileformats/extras/application/archive.py:9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mattrs\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmark\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecs\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FsObject\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pydra.mark'" + "name": "stderr", + "output_type": "stream", + "text": [ + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev71+g0dc7ec60.d20241216\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LoadJsonOutputs(out={'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]})\n" ] } ], @@ -44,9 +34,16 @@ "from fileformats.application import Json\n", "from pydra.tasks.common import LoadJson\n", "\n", - "load_json = LoadJson(file=Json.sample(\"/path/to/my/file.json\"))\n", - "result = load_json(plugin=\"serial\")\n", + "# Create a sample JSON file to test\n", + "json_file = Json.sample()\n", + "\n", + "# Parameterise the task to load the JSON file\n", + "load_json = LoadJson(file=json_file)\n", + "\n", + "# Run the task\n", + "result = load_json()\n", "\n", + "# Print the output interface of the of the task (LoadJson.Outputs)\n", "print(result.output)" ] }, diff --git a/docs/tst.py b/docs/tst.py new file mode 100644 index 0000000000..c9d11c8773 --- /dev/null +++ b/docs/tst.py @@ -0,0 +1,14 @@ +from fileformats.application import Json +from pydra.tasks.common import LoadJson + +# Create a sample JSON file to test +json_file = Json.sample() + +# Parameterise the task to load the JSON file +load_json = LoadJson(file=json_file) + +# Run the task +result = load_json(plugin="serial") + +# Print the output interface of the of the task (LoadJson.Outputs) +print(result.output) diff --git a/pydra/tasks/common/__init__.py b/pydra/tasks/common/__init__.py index e335197ea2..0081c43cb0 100644 --- a/pydra/tasks/common/__init__.py +++ b/pydra/tasks/common/__init__.py @@ -5,5 +5,5 @@ @python.define def LoadJson(file: Json) -> dict | list: - with open(file.path) as file: - return json.load(file) + with open(file) as f: + return json.load(f) From e193bb8a80472a4198655afc42ed91b94a5847ae Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 24 Dec 2024 13:27:28 +1100 Subject: [PATCH 086/342] reworking testing tasks --- .github/workflows/docs.yml | 66 ++++++++++++++++++++++++++++ docs/source/tutorial/execution.ipynb | 21 ++++----- pydra/design/python.py | 5 +++ 3 files changed, 80 insertions(+), 12 deletions(-) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000..a1ce2ebaff --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,66 @@ +# This workflows will upload a Python Package using Twine when a release is created +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries + +name: Build docs + +on: + push: + branches: + - master + pull_request: + # release: + # types: [published] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install Pandoc + run: | + sudo apt-get update + sudo apt-get install -Y pandoc + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build docs + run: | + cd docs + make html + twine upload dist/* + - uses: actions/upload-artifact@v3 + with: + name: built-docs + path: docs/build/html + + deploy: + needs: [build] + runs-on: ubuntu-latest + steps: + - name: Download built docs + uses: actions/download-artifact@v3 + with: + name: built-docs + path: docs-build + - name: Check for GHPAGES_DEPLOY_KEY token + id: deployable + if: github.event_name == 'release' + env: + GHPAGES_DEPLOY_KEY: "${{ secrets.GHPAGES_DEPLOY_KEY }}" + run: if [ -n "$GHPAGES_DEPLOY_KEY" ]; then echo "DEPLOY=true" >> $GITHUB_OUTPUT; fi + - name: Deploy Docs to GitHub Pages + if: steps.deployable.outputs.DEPLOY + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GHPAGES_DEPLOY_KEY }} + publish_dir: docs-build diff --git a/docs/source/tutorial/execution.ipynb b/docs/source/tutorial/execution.ipynb index 441e1d890d..24935a0a52 100644 --- a/docs/source/tutorial/execution.ipynb +++ b/docs/source/tutorial/execution.ipynb @@ -12,21 +12,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev71+g0dc7ec60.d20241216\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "LoadJsonOutputs(out={'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]})\n" + "Sample JSON file created at '0UAqFzWsDK4FrUMp48Y3tT3Q.json' with contents: {\"a\": true, \"b\": \"two\", \"c\": 3, \"d\": [7, 0.5598136790149003, 6]}\n", + "Loaded contents: {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]}\n" ] } ], @@ -37,14 +31,17 @@ "# Create a sample JSON file to test\n", "json_file = Json.sample()\n", "\n", - "# Parameterise the task to load the JSON file\n", + "# Print the path of the sample JSON file and its contents for reference\n", + "print(f\"Sample JSON file created at {json_file.name!r} with contents: {json_file.read_text()}\")\n", + "\n", + "# Parameterise the task specification to load the JSON file\n", "load_json = LoadJson(file=json_file)\n", "\n", - "# Run the task\n", + "# Run the task to load the JSON file\n", "result = load_json()\n", "\n", "# Print the output interface of the of the task (LoadJson.Outputs)\n", - "print(result.output)" + "print(f\"Loaded contents: {result.output.out}\")" ] }, { diff --git a/pydra/design/python.py b/pydra/design/python.py index abeecf30b2..625b882367 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -115,6 +115,11 @@ def define( The outputs of the function or class. auto_attribs : bool Whether to use auto_attribs mode when creating the class. + + Returns + ------- + PythonSpec + The task specification class for the Python function """ from pydra.engine.task import PythonTask from pydra.engine.specs import PythonSpec, PythonOutputs From 53f28bd9d4613af22a9e967107ebc78c43b590f9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 24 Dec 2024 21:38:17 +1100 Subject: [PATCH 087/342] Added shell-task tutorial --- docs/source/tutorial/shell.ipynb | 356 ++++++++++++++++++++++++++++++- pydra/design/tests/test_shell.py | 82 ++++--- tutorial/README.md | 5 - 3 files changed, 394 insertions(+), 49 deletions(-) delete mode 100644 tutorial/README.md diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb index 1f280a152f..5d949df16d 100644 --- a/docs/source/tutorial/shell.ipynb +++ b/docs/source/tutorial/shell.ipynb @@ -4,7 +4,345 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Shell task design" + "# Shell-task design" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Command-line template\n", + "\n", + "Define a shell-task specification using a command template string. Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[outarg(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file'), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n" + ] + }, + { + "ename": "TypeError", + "evalue": "cp.__init__() got an unexpected keyword argument 'in_file'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(list_fields(Cp))\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# Parameterise the task spec\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m cp \u001b[38;5;241m=\u001b[39m \u001b[43mCp\u001b[49m\u001b[43m(\u001b[49m\u001b[43min_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./out.txt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Print the cmdline to be run to double check\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(cp\u001b[38;5;241m.\u001b[39mcmdline)\n", + "\u001b[0;31mTypeError\u001b[0m: cp.__init__() got an unexpected keyword argument 'in_file'" + ] + } + ], + "source": [ + "from pydra.design import shell\n", + "from pydra.engine.helpers import list_fields\n", + "\n", + "test_file = \"./in.txt\"\n", + "with open(test_file, \"w\") as f:\n", + " f.write(\"this is a test file\\n\")\n", + "\n", + "# Define the shell-command task specification\n", + "Cp = shell.define(\"cp \")\n", + "\n", + "# Parameterise the task spec\n", + "cp = Cp(in_file=test_file, out_file=\"./out.txt\")\n", + "\n", + "# Print the cmdline to be run to double check\n", + "print(cp.cmdline)\n", + "\n", + "# Run the shell-comand task\n", + "cp()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If paths to output files are not provided in the parameterisation, it will default to the name of the field" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cp = Cp(in_file=test_file)\n", + "print(cp.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", + "\n", + "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.image import Png\n", + "\n", + "TrimPng = shell.define(\"trim-png \")\n", + "\n", + "trim_png = TrimPng(in_image=Png.mock())\n", + "\n", + "print(trim_png.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Adding options\n", + "\n", + "Command line flags can also be added to the shell template, either the single or double hyphen form. The field template name immediately following the flag will be associate with that flag.\n", + "\n", + "If there is no space between the flag and the field template, then the field is assumed to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", + "\n", + "If a field is optional, the field template should end with a `?`. Tuple fields are specified by comma separated types.\n", + "\n", + "Varargs are specified by the type followed by an ellipsis, e.g. ``" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Specifying defaults\n", + "\n", + "Defaults can be specified by appending them to the field template after `=`" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'--int-arg' default: 99\n" + ] + } + ], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " )\n", + "\n", + "fields = {f.name: f for f in list_fields(Cp)}\n", + "print(f\"'--int-arg' default: {fields['int_arg'].default}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Specifying other field attributes\n", + "\n", + "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " inputs={\"recursive\": shell.arg(\n", + " help_string=(\n", + " \"If source_file designates a directory, cp copies the directory and \"\n", + " \"the entire subtree connected at that point.\"\n", + " )\n", + " )},\n", + " outputs={\n", + " \"out_dir\": shell.outarg(position=-2),\n", + " \"out_file\": shell.outarg(position=-1),\n", + " },\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Callable outptus\n", + "\n", + "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from fileformats.generic import File\n", + "\n", + "\n", + "def get_file_size(out_file: Path) -> int:\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + "\n", + "ACommand = shell.define(\n", + " name=\"a-command \",\n", + " outputs=[\n", + " shell.out(\n", + " name=\"out_file_size\",\n", + " type=int,\n", + " help_string=\"size of the output directory\",\n", + " callable=get_file_size,\n", + " )\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataclass form\n", + "\n", + "Like with Python tasks, shell-tasks can also be specified in dataclass-form by using `shell.define` as a decorator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.generic import FsObject, Directory\n", + "from pydra.utils.typing import MultiInputObj\n", + "\n", + "@shell.define\n", + "class Cp:\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[FsObject]\n", + " recursive: bool = False\n", + " text_arg: str\n", + " int_arg: int | None = None\n", + " tuple_arg: tuple[int, str] | None = None\n", + "\n", + " class Outputs:\n", + " out_dir: Directory " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or alternatively in its canonical form, which is preferred when developing tool-packages as it will be type-checkable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@shell.define\n", + "class Cp(shell.Spec[\"Cp.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[FsObject] = shell.arg()\n", + " recursive: bool = shell.arg(default=False)\n", + " text_arg: str = shell.arg()\n", + " int_arg: int | None = shell.arg(default=None)\n", + " tuple_arg: tuple[int, str] | None = shell.arg(default=None)\n", + "\n", + " @shell.outputs\n", + " class Outputs(shell.Outputs):\n", + " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dynamic form\n", + "\n", + "In some cases, it is required to generate the specification for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ACommand = shell.define(\n", + " name=\"a-command\",\n", + " inputs={\n", + " \"in_file\": shell.arg(type=File, help_string=\"output file\", argstr=\"\", position=-1)\n", + " },\n", + " outputs={\n", + " \"out_file\": shell.outarg(\n", + " type=File, help_string=\"output file\", argstr=\"\", position=-1\n", + " ),\n", + " \"out_file_size\": {\n", + " \"type\": int,\n", + " \"help_string\": \"size of the output directory\",\n", + " \"callable\": get_file_size,\n", + " }\n", + " },\n", + " )" ] }, { @@ -14,8 +352,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index e25d2c7a5d..771bcba995 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -20,16 +20,16 @@ def test_interface_template(): - SampleInterface = shell.define("cp ") + Cp = shell.define("cp ") - assert issubclass(SampleInterface, ShellSpec) + assert issubclass(Cp, ShellSpec) output = shell.outarg( name="out_path", path_template="out_path", type=FsObject, position=2, ) - assert sorted_fields(SampleInterface) == [ + assert sorted_fields(Cp) == [ shell.arg( name="executable", validator=attrs.validators.min_len(1), @@ -41,7 +41,7 @@ def test_interface_template(): shell.arg(name="in_path", type=FsObject, position=1), output, ] - assert sorted_fields(SampleInterface.Outputs) == [ + assert sorted_fields(Cp.Outputs) == [ output, shell.out( name="return_code", @@ -59,26 +59,24 @@ def test_interface_template(): help_string=STDOUT_HELP, ), ] - intf = SampleInterface(in_path=File.mock("in-path.txt")) + intf = Cp(in_path=File.mock("in-path.txt")) assert intf.executable == "cp" - SampleInterface(in_path=File.mock("in-path.txt"), out_path=Path("./out-path.txt")) - SampleInterface.Outputs(out_path=File.mock("in-path.txt")) + Cp(in_path=File.mock("in-path.txt"), out_path=Path("./out-path.txt")) + Cp.Outputs(out_path=File.mock("in-path.txt")) def test_interface_template_w_types_and_path_template_ext(): - SampleInterface = shell.define( - "trim-png " - ) + TrimPng = shell.define("trim-png ") - assert issubclass(SampleInterface, ShellSpec) + assert issubclass(TrimPng, ShellSpec) output = shell.outarg( name="out_image", path_template="out_image.png", type=image.Png, position=2, ) - assert sorted_fields(SampleInterface) == [ + assert sorted_fields(TrimPng) == [ shell.arg( name="executable", validator=attrs.validators.min_len(1), @@ -90,7 +88,7 @@ def test_interface_template_w_types_and_path_template_ext(): shell.arg(name="in_image", type=image.Png, position=1), output, ] - assert sorted_fields(SampleInterface.Outputs) == [ + assert sorted_fields(TrimPng.Outputs) == [ output, shell.out( name="return_code", @@ -108,17 +106,17 @@ def test_interface_template_w_types_and_path_template_ext(): help_string=STDOUT_HELP, ), ] - SampleInterface(in_image=image.Png.mock()) - SampleInterface(in_image=image.Png.mock(), out_image=Path("./new_image.png")) - SampleInterface.Outputs(out_image=image.Png.mock()) + TrimPng(in_image=image.Png.mock()) + TrimPng(in_image=image.Png.mock(), out_image=Path("./new_image.png")) + TrimPng.Outputs(out_image=image.Png.mock()) def test_interface_template_w_modify(): - SampleInterface = shell.define("trim-png ") + TrimPng = shell.define("trim-png ") - assert issubclass(SampleInterface, ShellSpec) - assert sorted_fields(SampleInterface) == [ + assert issubclass(TrimPng, ShellSpec) + assert sorted_fields(TrimPng) == [ shell.arg( name="executable", validator=attrs.validators.min_len(1), @@ -131,7 +129,7 @@ def test_interface_template_w_modify(): name="image", type=image.Png, position=1, copy_mode=File.CopyMode.copy ), ] - assert sorted_fields(SampleInterface.Outputs) == [ + assert sorted_fields(TrimPng.Outputs) == [ shell.out( name="image", type=image.Png, @@ -153,13 +151,13 @@ def test_interface_template_w_modify(): help_string=STDOUT_HELP, ), ] - SampleInterface(image=image.Png.mock()) - SampleInterface.Outputs(image=image.Png.mock()) + TrimPng(image=image.Png.mock()) + TrimPng.Outputs(image=image.Png.mock()) def test_interface_template_more_complex(): - SampleInterface = shell.define( + Cp = shell.define( ( "cp " "-R " @@ -169,14 +167,14 @@ def test_interface_template_more_complex(): ), ) - assert issubclass(SampleInterface, ShellSpec) + assert issubclass(Cp, ShellSpec) output = shell.outarg( name="out_dir", type=Directory, path_template="out_dir", position=2, ) - assert sorted_fields(SampleInterface) == [ + assert sorted_fields(Cp) == [ shell.arg( name="executable", validator=attrs.validators.min_len(1), @@ -212,7 +210,7 @@ def test_interface_template_more_complex(): position=6, ), ] - assert sorted_fields(SampleInterface.Outputs) == [ + assert sorted_fields(Cp.Outputs) == [ output, shell.out( name="return_code", @@ -230,8 +228,8 @@ def test_interface_template_more_complex(): help_string=STDOUT_HELP, ), ] - SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) - SampleInterface.Outputs(out_dir=Directory.sample()) + Cp(in_fs_objects=[File.sample(), File.sample(seed=1)]) + Cp.Outputs(out_dir=Directory.sample()) def test_interface_template_with_overrides_and_optionals(): @@ -241,7 +239,7 @@ def test_interface_template_with_overrides_and_optionals(): "subtree connected at that point." ) - SampleInterface = shell.define( + Cp = shell.define( ( "cp " "-R " @@ -256,7 +254,7 @@ def test_interface_template_with_overrides_and_optionals(): }, ) - assert issubclass(SampleInterface, ShellSpec) + assert issubclass(Cp, ShellSpec) outargs = [ shell.outarg( name="out_dir", @@ -273,7 +271,7 @@ def test_interface_template_with_overrides_and_optionals(): ), ] assert ( - sorted_fields(SampleInterface) + sorted_fields(Cp) == [ shell.arg( name="executable", @@ -311,7 +309,7 @@ def test_interface_template_with_overrides_and_optionals(): ] + outargs ) - assert sorted_fields(SampleInterface.Outputs) == outargs + [ + assert sorted_fields(Cp.Outputs) == outargs + [ shell.out( name="return_code", type=int, @@ -332,7 +330,7 @@ def test_interface_template_with_overrides_and_optionals(): def test_interface_template_with_defaults(): - SampleInterface = shell.define( + Cp = shell.define( ( "cp " "-R " @@ -342,14 +340,14 @@ def test_interface_template_with_defaults(): ), ) - assert issubclass(SampleInterface, ShellSpec) + assert issubclass(Cp, ShellSpec) output = shell.outarg( name="out_dir", type=Directory, path_template="out_dir", position=2, ) - assert sorted_fields(SampleInterface) == [ + assert sorted_fields(Cp) == [ shell.arg( name="executable", validator=attrs.validators.min_len(1), @@ -375,7 +373,7 @@ def test_interface_template_with_defaults(): position=6, ), ] - assert sorted_fields(SampleInterface.Outputs) == [ + assert sorted_fields(Cp.Outputs) == [ output, shell.out( name="return_code", @@ -393,13 +391,13 @@ def test_interface_template_with_defaults(): help_string=STDOUT_HELP, ), ] - SampleInterface(in_fs_objects=[File.sample(), File.sample(seed=1)]) - SampleInterface.Outputs(out_dir=Directory.sample()) + Cp(in_fs_objects=[File.sample(), File.sample(seed=1)]) + Cp.Outputs(out_dir=Directory.sample()) def test_interface_template_with_type_overrides(): - SampleInterface = shell.define( + Cp = shell.define( ( "cp " "-R " @@ -410,14 +408,14 @@ def test_interface_template_with_type_overrides(): inputs={"text_arg": str, "int_arg": int | None}, ) - assert issubclass(SampleInterface, ShellSpec) + assert issubclass(Cp, ShellSpec) output = shell.outarg( name="out_dir", type=Directory, path_template="out_dir", position=2, ) - assert sorted_fields(SampleInterface) == [ + assert sorted_fields(Cp) == [ shell.arg( name="executable", validator=attrs.validators.min_len(1), @@ -445,7 +443,7 @@ def test_interface_template_with_type_overrides(): position=6, ), ] - assert sorted_fields(SampleInterface.Outputs) == [ + assert sorted_fields(Cp.Outputs) == [ output, shell.out( name="return_code", diff --git a/tutorial/README.md b/tutorial/README.md deleted file mode 100644 index 4df55ac5a0..0000000000 --- a/tutorial/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Pydra Tutorial - -Python Tutorial has been moved to a separate [GitHub repository](https://github.com/nipype/pydra-tutorial). - -The interactive tutorial is available at [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/master?filepath=notebooks) From 75d429bace5060c4480833b27719f0b03b9b521a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 26 Dec 2024 11:35:01 +1100 Subject: [PATCH 088/342] fixed up templating defaults --- docs/source/tutorial/shell.ipynb | 74 +++++++++++++++++++++----------- pydra/design/shell.py | 11 ++++- pydra/engine/helpers_file.py | 49 +++++++++++++++------ pydra/engine/specs.py | 18 ++++++-- 4 files changed, 108 insertions(+), 44 deletions(-) diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb index 5d949df16d..dd34664e2f 100644 --- a/docs/source/tutorial/shell.ipynb +++ b/docs/source/tutorial/shell.ipynb @@ -11,54 +11,70 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Command-line template\n", + "## Command-line templates\n", "\n", - "Define a shell-task specification using a command template string. Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix." + "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "\n", + "Cp = shell.define(\"cp \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", + "\n", + "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[outarg(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file'), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n" - ] - }, - { - "ename": "TypeError", - "evalue": "cp.__init__() got an unexpected keyword argument 'in_file'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(list_fields(Cp))\n\u001b[1;32m 12\u001b[0m \u001b[38;5;66;03m# Parameterise the task spec\u001b[39;00m\n\u001b[0;32m---> 13\u001b[0m cp \u001b[38;5;241m=\u001b[39m \u001b[43mCp\u001b[49m\u001b[43m(\u001b[49m\u001b[43min_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_file\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m./out.txt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# Print the cmdline to be run to double check\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(cp\u001b[38;5;241m.\u001b[39mcmdline)\n", - "\u001b[0;31mTypeError\u001b[0m: cp.__init__() got an unexpected keyword argument 'in_file'" + "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/out.txt\n", + "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/out.txt'): 'Contents to be copied'\n" ] } ], "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", "from pydra.design import shell\n", "from pydra.engine.helpers import list_fields\n", "\n", - "test_file = \"./in.txt\"\n", + "# Make a test file to copy\n", + "test_dir = Path(mkdtemp())\n", + "test_file = test_dir / \"in.txt\"\n", "with open(test_file, \"w\") as f:\n", - " f.write(\"this is a test file\\n\")\n", - "\n", - "# Define the shell-command task specification\n", - "Cp = shell.define(\"cp \")\n", + " f.write(\"Contents to be copied\")\n", "\n", "# Parameterise the task spec\n", - "cp = Cp(in_file=test_file, out_file=\"./out.txt\")\n", + "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", "\n", "# Print the cmdline to be run to double check\n", - "print(cp.cmdline)\n", + "print(f\"Command-line to be run: {cp.cmdline}\")\n", "\n", "# Run the shell-comand task\n", - "cp()" + "result = cp()\n", + "\n", + "print(\n", + " f\"Contents of copied file ('{result.output.destination}'): \"\n", + " f\"'{Path(result.output.destination).read_text()}'\"\n", + ")" ] }, { @@ -70,9 +86,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/in.txt True\n" + ] + } + ], "source": [ "cp = Cp(in_file=test_file)\n", "print(cp.cmdline)" diff --git a/pydra/design/shell.py b/pydra/design/shell.py index b654acd77b..544bc30ff7 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -189,6 +189,7 @@ class outarg(Out, arg): """ path_template: str | None = attrs.field(default=None) + keep_extension: bool = attrs.field(default=False) @path_template.validator def _validate_path_template(self, attribute, value): @@ -198,6 +199,14 @@ def _validate_path_template(self, attribute, value): f"({self.default!r}) is provided" ) + @keep_extension.validator + def _validate_keep_extension(self, attribute, value): + if value and self.path_template is not None: + raise ValueError( + f"keep_extension ({value!r}) can only be provided when path_template " + f"is provided" + ) + @dataclass_transform( kw_only_default=True, @@ -465,7 +474,7 @@ def parse_command_line_template( outputs = {} parts = template.split() executable = [] - for i, part in enumerate(parts, start=1): + for i, part in enumerate(parts): if part.startswith("<") or part.startswith("-"): break executable.append(part) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 339ae2ba62..7b1e127ad2 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -10,7 +10,7 @@ from contextlib import contextmanager import attr from fileformats.core import FileSet -from pydra.engine.helpers import is_lazy, attrs_values +from pydra.engine.helpers import is_lazy, attrs_values, list_fields logger = logging.getLogger("pydra") @@ -114,17 +114,18 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): k = k.split(".")[1] inputs_dict_st[k] = inputs_dict_st[k][v] - from .specs import attrs_fields + from pydra.design import shell # Collect templated inputs for which all requirements are satisfied. fields_templ = [ field - for field in attrs_fields(inputs) - if field.metadata.get("output_file_template") + for field in list_fields(inputs) + if isinstance(field, shell.outarg) + and field.path_template and getattr(inputs, field.name) is not False and all( - getattr(inputs, required_field) is not attr.NOTHING - for required_field in field.metadata.get("requires", ()) + getattr(inputs, required_field) is not None + for required_field in field.requires ) ] @@ -151,8 +152,7 @@ def template_update_single( """ # if input_dict_st with state specific value is not available, # the dictionary will be created from inputs object - from pydra.utils.typing import TypeParser # noqa - from pydra.engine.specs import OUTPUT_TEMPLATE_TYPES + from pydra.utils.typing import TypeParser, OUTPUT_TEMPLATE_TYPES # noqa if inputs_dict_st is None: inputs_dict_st = attrs_values(inputs) @@ -200,9 +200,23 @@ def _template_formatting(field, inputs, inputs_dict_st): returning a list of formatted templates in that case. Allowing for multiple input values used in the template as longs as there is no more than one file (i.e. File, PathLike or string with extensions) + + Parameters + ---------- + field : pydra.engine.helpers.Field + field with a template + inputs : pydra.engine.helpers.Input + inputs object + inputs_dict_st : dict + dictionary with values from inputs object + + Returns + ------- + formatted : str or list + formatted template """ # if a template is a function it has to be run first with the inputs as the only arg - template = field.metadata["output_file_template"] + template = field.path_template if callable(template): template = template(inputs) @@ -219,9 +233,8 @@ def _template_formatting(field, inputs, inputs_dict_st): def _string_template_formatting(field, template, inputs, inputs_dict_st): - from .specs import MultiInputObj, MultiOutputFile + from pydra.utils.typing import MultiInputObj, MultiOutputFile - keep_extension = field.metadata.get("keep_extension", True) inp_fields = re.findall(r"{\w+}", template) inp_fields_fl = re.findall(r"{\w+:[0-9.]+f}", template) inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_fl] @@ -281,17 +294,25 @@ def _string_template_formatting(field, template, inputs, inputs_dict_st): formatted_value.append( _element_formatting( - template, val_dict_el, file_template, keep_extension=keep_extension + template, + val_dict_el, + file_template, + keep_extension=field.keep_extension, ) ) else: formatted_value = _element_formatting( - template, val_dict, file_template, keep_extension=keep_extension + template, val_dict, file_template, keep_extension=field.keep_extension ) return formatted_value -def _element_formatting(template, values_template_dict, file_template, keep_extension): +def _element_formatting( + template: str, + values_template_dict: dict[str, ty.Any], + file_template: str, + keep_extension: bool, +): """Formatting a single template for a single element (if a list). Taking into account that a file used in the template (file_template) and the template itself could have file extensions diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 22331afcc9..e18cbaada7 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -494,7 +494,7 @@ def from_task( ) # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides - if isinstance(fld, shell.outarg) and is_set(getattr(task.inputs, fld.name)): + if isinstance(fld, shell.outarg) and is_set(getattr(task.spec, fld.name)): resolved_value = getattr(task.spec, fld.name) elif is_set(fld.default): resolved_value = cls._resolve_default_value(fld, task.output_dir) @@ -691,10 +691,20 @@ def _command_args( else: if name in modified_inputs: pos_val = self._command_pos_args( - field, value, output_dir, root=root + field=field, + value=value, + inputs=inputs, + root=root, + output_dir=output_dir, ) else: - pos_val = self._command_pos_args(field, value, output_dir, inputs) + pos_val = self._command_pos_args( + field=field, + value=value, + output_dir=output_dir, + inputs=inputs, + root=root, + ) if pos_val: pos_args.append(pos_val) @@ -755,7 +765,7 @@ def _command_pos_args( # Shift negatives down to allow args to be -1 field.position += 1 if field.position >= 0 else -1 - if value: + if value and isinstance(value, str): if root: # values from templates value = value.replace(str(output_dir), f"{root}{output_dir}") From 92fe97c33087b0743f91bf2d7af6b23a20faa41a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 10:13:38 +1100 Subject: [PATCH 089/342] debugged shell and python task tutorias --- docs/source/tutorial/shell.ipynb | 265 +++++++++++++++++------- docs/source/tutorial/task.ipynb | 61 +++--- pydra/design/base.py | 22 +- pydra/design/shell.py | 9 +- pydra/engine/core.py | 68 ++---- pydra/engine/helpers.py | 40 +--- pydra/engine/helpers_file.py | 56 ++--- pydra/engine/specs.py | 148 +++++-------- pydra/engine/task.py | 79 ++++++- pydra/engine/tests/test_helpers_file.py | 2 +- 10 files changed, 433 insertions(+), 317 deletions(-) diff --git a/docs/source/tutorial/shell.ipynb b/docs/source/tutorial/shell.ipynb index dd34664e2f..ecc6504156 100644 --- a/docs/source/tutorial/shell.ipynb +++ b/docs/source/tutorial/shell.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -38,23 +38,21 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/out.txt\n", - "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/out.txt'): 'Contents to be copied'\n" + "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt\n", + "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt'): 'Contents to be copied'\n" ] } ], "source": [ "from pathlib import Path\n", "from tempfile import mkdtemp\n", - "from pydra.design import shell\n", - "from pydra.engine.helpers import list_fields\n", "\n", "# Make a test file to copy\n", "test_dir = Path(mkdtemp())\n", @@ -86,14 +84,14 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnw4kzvv0/in.txt True\n" + "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /Users/tclose/git/workflows/pydra/docs/source/tutorial/destination\n" ] } ], @@ -106,6 +104,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "### Specifying types\n", + "\n", "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", "\n", "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" @@ -113,15 +113,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "trim-png /mock/png.png /path/to/output.png\n" + ] + } + ], "source": [ "from fileformats.image import Png\n", "\n", "TrimPng = shell.define(\"trim-png \")\n", "\n", - "trim_png = TrimPng(in_image=Png.mock())\n", + "trim_png = TrimPng(in_image=Png.mock(), out_image=\"/path/to/output.png\")\n", "\n", "print(trim_png.cmdline)" ] @@ -135,7 +143,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Adding options\n", + "### Flags and options\n", "\n", "Command line flags can also be added to the shell template, either the single or double hyphen form. The field template name immediately following the flag will be associate with that flag.\n", "\n", @@ -148,10 +156,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=str | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=6, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + ] + } + ], "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", "Cp = shell.define(\n", " (\n", " \"cp \"\n", @@ -160,21 +189,24 @@ " \"--int-arg \"\n", " \"--tuple-arg \"\n", " ),\n", - " )" + " )\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Specifying defaults\n", + "### Defaults\n", "\n", "Defaults can be specified by appending them to the field template after `=`" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -196,24 +228,43 @@ " ),\n", " )\n", "\n", - "fields = {f.name: f for f in list_fields(Cp)}\n", - "print(f\"'--int-arg' default: {fields['int_arg'].default}\")" + "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Specifying other field attributes\n", + "### Additional field attributes\n", "\n", "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", + " 'recursive': arg(name='recursive', type=, default=False, help_string='If source_file designates a directory, cp copies the directory and the entire subtree connected at that point.', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + ] + } + ], "source": [ "Cp = shell.define(\n", " (\n", @@ -233,45 +284,76 @@ " \"out_dir\": shell.outarg(position=-2),\n", " \"out_file\": shell.outarg(position=-1),\n", " },\n", - " )" + " )\n", + "\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Callable outptus\n", + "### Callable outptus\n", "\n", - "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value" + "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value. Callables can be either specified in the `callable` attribute of the `shell.out` field, or in a dictionary mapping the output name to the callable" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Size of the output file is: 256\n" + ] + } + ], "source": [ "import os\n", + "from pydra.design import shell\n", "from pathlib import Path\n", "from fileformats.generic import File\n", "\n", - "\n", + "# Arguments to the callable function can be one of \n", "def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", " result = os.stat(out_file)\n", " return result.st_size\n", "\n", "\n", - "ACommand = shell.define(\n", - " name=\"a-command \",\n", - " outputs=[\n", - " shell.out(\n", - " name=\"out_file_size\",\n", - " type=int,\n", - " help_string=\"size of the output directory\",\n", - " callable=get_file_size,\n", - " )\n", - " ],\n", - ")" + "CpWithSize = shell.define(\n", + " \"cp \",\n", + " outputs={\"out_file_size\": get_file_size},\n", + ")\n", + "\n", + "# Parameterise the task spec\n", + "cp_with_size = CpWithSize(in_file=File.sample())\n", + "\n", + "# Run the command\n", + "result = cp_with_size()\n", + "\n", + "\n", + "print(f\"Size of the output file is: {result.output.out_file_size}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The callable can take any combination of the following arguments, which will be passed\n", + "to it when it is called\n", + "\n", + "* field: the `Field` object to be provided a value, useful when writing generic callables\n", + "* output_dir: a `Path` object referencing the working directory the command was run within\n", + "* inputs: a dictionary containing all the resolved inputs to the task\n", + "* stdout: the standard output stream produced by the command\n", + "* stderr: the standard error stream produced by the command\n", + "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" ] }, { @@ -280,48 +362,75 @@ "source": [ "## Dataclass form\n", "\n", - "Like with Python tasks, shell-tasks can also be specified in dataclass-form by using `shell.define` as a decorator" + "Like with Python tasks, shell-tasks can also be specified in dataclass-form by using `shell.define` as a decorator." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=1, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_file': out(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=None),\n", + " 'out_file_size': out(name='out_file_size', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + ] + } + ], "source": [ "from fileformats.generic import FsObject, Directory\n", "from pydra.utils.typing import MultiInputObj\n", "\n", "@shell.define\n", - "class Cp:\n", + "class CpWithSize:\n", "\n", " executable = \"cp\"\n", "\n", " in_fs_objects: MultiInputObj[FsObject]\n", - " recursive: bool = False\n", - " text_arg: str\n", - " int_arg: int | None = None\n", - " tuple_arg: tuple[int, str] | None = None\n", + " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\", default=None)\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\", default=None)\n", "\n", " class Outputs:\n", - " out_dir: Directory " + " out_file: File\n", + " out_file_size: int = shell.out(callable=get_file_size)\n", + "\n", + "\n", + "pprint(fields_dict(CpWithSize))\n", + "pprint(fields_dict(CpWithSize.Outputs))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Or alternatively in its canonical form, which is preferred when developing tool-packages as it will be type-checkable" + "To make workflows that use the interface type-checkable, the canonical form of a shell\n", + "task dataclass should inherit from `shell.Spec` parameterized by its nested Outputs class,\n", + "and the `Outputs` nested class should inherit from `shell.Outputs`." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ + "from pydra.engine.specs import ShellSpec, ShellOutputs\n", + "\n", "@shell.define\n", - "class Cp(shell.Spec[\"Cp.Outputs\"]):\n", + "class Cp(ShellSpec[\"Cp.Outputs\"]):\n", "\n", " executable = \"cp\"\n", "\n", @@ -332,7 +441,7 @@ " tuple_arg: tuple[int, str] | None = shell.arg(default=None)\n", "\n", " @shell.outputs\n", - " class Outputs(shell.Outputs):\n", + " class Outputs(ShellOutputs):\n", " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" ] }, @@ -340,33 +449,49 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Dynamic form\n", + "## Dynamic specifications\n", "\n", "In some cases, it is required to generate the specification for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ACommand input fields: [arg(name='in_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None), outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='a-command', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n", + "ACommand input fields: [outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), out(name='out_file_size', type=, default=EMPTY, help_string='size of the output directory', requires=[], converter=None, validator=None, callable=), out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None), out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None), out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None)]\n" + ] + } + ], "source": [ + "from fileformats.generic import File\n", + "from pydra.engine.helpers import list_fields\n", + "\n", "ACommand = shell.define(\n", - " name=\"a-command\",\n", - " inputs={\n", - " \"in_file\": shell.arg(type=File, help_string=\"output file\", argstr=\"\", position=-1)\n", - " },\n", - " outputs={\n", - " \"out_file\": shell.outarg(\n", - " type=File, help_string=\"output file\", argstr=\"\", position=-1\n", - " ),\n", - " \"out_file_size\": {\n", - " \"type\": int,\n", - " \"help_string\": \"size of the output directory\",\n", - " \"callable\": get_file_size,\n", - " }\n", - " },\n", - " )" + " \"a-command\",\n", + " inputs={\n", + " \"in_file\": shell.arg(type=File, help_string=\"output file\", argstr=\"\", position=-2)\n", + " },\n", + " outputs={\n", + " \"out_file\": shell.outarg(\n", + " type=File, help_string=\"output file\", argstr=\"\", position=-1\n", + " ),\n", + " \"out_file_size\": {\n", + " \"type\": int,\n", + " \"help_string\": \"size of the output directory\",\n", + " \"callable\": get_file_size,\n", + " }\n", + " },\n", + ")\n", + "\n", + "\n", + "print(f\"ACommand input fields: {list_fields(ACommand)}\")\n", + "print(f\"ACommand input fields: {list_fields(ACommand.Outputs)}\")\n" ] }, { diff --git a/docs/source/tutorial/task.ipynb b/docs/source/tutorial/task.ipynb index f771ee618f..cae0ad88cb 100644 --- a/docs/source/tutorial/task.ipynb +++ b/docs/source/tutorial/task.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -113,24 +113,24 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)]\n", - "[out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)]\n" + "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" ] } ], "source": [ "from pprint import pprint\n", - "from pydra.engine.helpers import list_fields\n", + "from pydra.engine.helpers import fields_dict\n", "\n", "@python.define(outputs=[\"c\", \"d\"])\n", "def SampleSpec(a: int, b: float) -> tuple[float, float]:\n", @@ -147,8 +147,8 @@ " \"\"\"\n", " return a + b, a * b\n", "\n", - "pprint(list_fields(SampleSpec))\n", - "pprint(list_fields(SampleSpec.Outputs))" + "pprint(fields_dict(SampleSpec))\n", + "pprint(fields_dict(SampleSpec.Outputs))" ] }, { @@ -160,18 +160,18 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[arg(name='b', type=, default=2.0, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)]\n", - "[out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)]\n" + "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=2.0, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" ] } ], @@ -204,8 +204,8 @@ " def function(a, b):\n", " return a + b, a * b\n", "\n", - "pprint(list_fields(SampleSpec))\n", - "pprint(list_fields(SampleSpec.Outputs))" + "pprint(fields_dict(SampleSpec))\n", + "pprint(fields_dict(SampleSpec.Outputs))" ] }, { @@ -217,23 +217,23 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)]\n", - "[out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)]\n" + "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" ] } ], "source": [ - "from pydra.engine.specs import PythonSpec\n", + "from pydra.engine.specs import PythonSpec, PythonOutputs\n", "\n", "@python.define\n", "class SampleSpec(PythonSpec[\"SampleSpec.Outputs\"]):\n", @@ -248,7 +248,8 @@ " a: int\n", " b: float\n", "\n", - " class Outputs:\n", + " @python.outputs\n", + " class Outputs(PythonOutputs):\n", " \"\"\"\n", " Args:\n", " c: Sum of a and b\n", @@ -262,8 +263,8 @@ " def function(a, b):\n", " return a + b, a * b\n", "\n", - "pprint(list_fields(SampleSpec))\n", - "pprint(list_fields(SampleSpec.Outputs))" + "pprint(fields_dict(SampleSpec))\n", + "pprint(fields_dict(SampleSpec.Outputs))" ] }, { diff --git a/pydra/design/base.py b/pydra/design/base.py index 996fface4e..e27a1bb0ba 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -618,12 +618,32 @@ def ensure_field_objects( out.name = output_name if not out.help_string: out.help_string = output_helps.get(output_name, "") - else: + elif inspect.isclass(out): outputs[output_name] = out_type( type=out, name=output_name, help_string=output_helps.get(output_name, ""), ) + elif isinstance(out, dict): + out_kwds = copy(out) + if "help_string" not in out_kwds: + out_kwds["help_string"] = output_helps.get(output_name, "") + outputs[output_name] = out_type( + name=output_name, + **out_kwds, + ) + elif isinstance(out, ty.Callable) and hasattr(out_type, "callable"): + outputs[output_name] = out_type( + name=output_name, + type=ty.get_type_hints(out).get("return", ty.Any), + callable=out, + help_string=re.split(r"\n\s*\n", out.__doc__)[0] if out.__doc__ else "", + ) + else: + raise ValueError( + f"Unrecognised value provided to outputs ({arg}), can be either {out_type} " + "type" + (" or callable" if hasattr(out_type, "callable") else "") + ) return inputs, outputs diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 544bc30ff7..419279383f 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -128,6 +128,11 @@ class out(Out): callable: ty.Callable | None = None + def __attrs_post_init__(self): + # Set type from return annotation of callable if not set + if self.type is ty.Any and self.callable: + self.type = ty.get_type_hints(self.callable).get("return", ty.Any) + @attrs.define(kw_only=True) class outarg(Out, arg): @@ -474,7 +479,7 @@ def parse_command_line_template( outputs = {} parts = template.split() executable = [] - for i, part in enumerate(parts): + for i, part in enumerate(parts, start=1): if part.startswith("<") or part.startswith("-"): break executable.append(part) @@ -484,7 +489,7 @@ def parse_command_line_template( executable = executable[0] if i == len(parts): return executable, inputs, outputs - args_str = " ".join(parts[i:]) + args_str = " ".join(parts[i - 1 :]) tokens = re.split(r"\s+", args_str.strip()) arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+(?:\?|=[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 40c059d076..780a2d9976 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -34,7 +34,7 @@ ensure_list, record_error, PydraFileLock, - parse_copyfile, + list_fields, is_lazy, ) from pydra.utils.hash import hash_function @@ -80,6 +80,8 @@ class Task: name: str spec: TaskSpec + _inputs: dict[str, ty.Any] | None = None + def __init__( self, spec, @@ -264,27 +266,7 @@ def output_names(self): @property def generated_output_names(self): - """Get the names of the outputs generated by the task. - If the spec doesn't have generated_output_names method, - it uses output_names. - The results depends on the input provided to the task - """ - output_klass = self.spec.Outputs - if hasattr(output_klass, "_generated_output_names"): - output = output_klass( - **{f.name: attr.NOTHING for f in attr.fields(output_klass)} - ) - # using updated input (after filing the templates) - _inputs = deepcopy(self.spec) - modified_inputs = template_update(_inputs, self.output_dir) - if modified_inputs: - _inputs = attr.evolve(_inputs, **modified_inputs) - - return output._generated_output_names( - inputs=_inputs, output_dir=self.output_dir - ) - else: - return self.output_names + return self.output_names @property def can_resume(self): @@ -372,8 +354,10 @@ def __call__( res = self._run(rerun=rerun, environment=environment, **kwargs) return res - def _modify_inputs(self): - """This method modifies the inputs of the task ahead of its execution: + @property + def inputs(self) -> dict[str, ty.Any]: + """Resolve any template inputs of the task ahead of its execution: + - links/copies upstream files and directories into the destination tasks working directory as required select state array values corresponding to state index (it will try to leave them where they are unless specified or @@ -383,46 +367,34 @@ def _modify_inputs(self): execution (they will be replaced after the task's execution with the original inputs to ensure the tasks checksums are consistent) """ + if self._inputs is not None: + return self._inputs + from pydra.utils.typing import TypeParser - orig_inputs = { + self._inputs = { k: v for k, v in attrs_values(self.spec).items() if not k.startswith("_") } map_copyfiles = {} - input_fields = attr.fields(type(self.spec)) - for name, value in orig_inputs.items(): - fld = getattr(input_fields, name) - copy_mode, copy_collation = parse_copyfile( - fld, default_collation=self.DEFAULT_COPY_COLLATION - ) + for fld in list_fields(self.spec): + name = fld.name + value = self._inputs[name] if value is not attr.NOTHING and TypeParser.contains_type( FileSet, fld.type ): copied_value = copy_nested_files( value=value, dest_dir=self.output_dir, - mode=copy_mode, - collation=copy_collation, + mode=fld.copy_mode, + collation=fld.copy_collation, supported_modes=self.SUPPORTED_COPY_MODES, ) if value is not copied_value: map_copyfiles[name] = copied_value - modified_inputs = template_update( - self.spec, self.output_dir, map_copyfiles=map_copyfiles - ) - assert all(m in orig_inputs for m in modified_inputs), ( - "Modified inputs contain fields not present in original inputs. " - "This is likely a bug." + self._inputs.update( + template_update(self.spec, self.output_dir, map_copyfiles=map_copyfiles) ) - for name, orig_value in orig_inputs.items(): - try: - value = modified_inputs[name] - except KeyError: - # Ensure we pass a copy not the original just in case inner - # attributes are modified during execution - value = deepcopy(orig_value) - setattr(self.spec, name, value) - return orig_inputs + return self._inputs def _populate_filesystem(self, checksum, output_dir): """ diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index c0536be299..8b2cc6428a 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -48,6 +48,11 @@ def list_fields(spec: "type[TaskSpec] | TaskSpec") -> list["Field"]: ] +def fields_dict(spec: "type[TaskSpec] | TaskSpec") -> dict[str, "Field"]: + """Returns the fields of a spec in a dictionary""" + return {f.name: f for f in list_fields(spec)} + + # from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile @@ -529,41 +534,6 @@ async def __aexit__(self, exc_type, exc_value, traceback): return None -def parse_copyfile(fld: attrs.Attribute, default_collation=FileSet.CopyCollation.any): - """Gets the copy mode from the 'copyfile' value from a field attribute""" - copyfile = fld.metadata.get("copyfile", FileSet.CopyMode.any) - if isinstance(copyfile, tuple): - mode, collation = copyfile - elif isinstance(copyfile, str): - try: - mode, collation = copyfile.split(",") - except ValueError: - mode = copyfile - collation = default_collation - else: - collation = FileSet.CopyCollation[collation] - mode = FileSet.CopyMode[mode] - else: - if copyfile is True: - mode = FileSet.CopyMode.copy - elif copyfile is False: - mode = FileSet.CopyMode.link - elif copyfile is None: - mode = FileSet.CopyMode.any - else: - mode = copyfile - collation = default_collation - if not isinstance(mode, FileSet.CopyMode): - raise TypeError( - f"Unrecognised type for mode copyfile metadata of {fld}, {mode}" - ) - if not isinstance(collation, FileSet.CopyCollation): - raise TypeError( - f"Unrecognised type for collation copyfile metadata of {fld}, {collation}" - ) - return mode, collation - - def parse_format_string(fmtstr): """Parse a argstr format string and return all keywords used in it.""" identifier = r"[a-zA-Z_]\w*" diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 7b1e127ad2..3705fd599f 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -97,7 +97,9 @@ def copy_fileset(fileset: FileSet): # not sure if this might be useful for Function Task -def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): +def template_update( + spec, output_dir: Path, map_copyfiles: dict[str, Path] | None = None +): """ Update all templates that are present in the input spec. @@ -105,26 +107,21 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): """ - inputs_dict_st = attrs_values(inputs) + inputs_dict_st = attrs_values(spec) if map_copyfiles is not None: inputs_dict_st.update(map_copyfiles) - if state_ind is not None: - for k, v in state_ind.items(): - k = k.split(".")[1] - inputs_dict_st[k] = inputs_dict_st[k][v] - from pydra.design import shell # Collect templated inputs for which all requirements are satisfied. fields_templ = [ field - for field in list_fields(inputs) + for field in list_fields(spec) if isinstance(field, shell.outarg) and field.path_template - and getattr(inputs, field.name) is not False + and getattr(spec, field.name) is not False and all( - getattr(inputs, required_field) is not None + getattr(spec, required_field) is not None for required_field in field.requires ) ] @@ -133,8 +130,8 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): for fld in fields_templ: dict_mod[fld.name] = template_update_single( field=fld, - inputs=inputs, - inputs_dict_st=inputs_dict_st, + spec=spec, + input_values=inputs_dict_st, output_dir=output_dir, ) # adding elements from map_copyfiles to fields with templates @@ -144,7 +141,11 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): def template_update_single( - field, inputs, inputs_dict_st=None, output_dir=None, spec_type="input" + field, + spec, + input_values: dict[str, ty.Any] = None, + output_dir: Path | None = None, + spec_type: str = "input", ): """Update a single template from the input_spec or output_spec based on the value from inputs_dict @@ -154,11 +155,11 @@ def template_update_single( # the dictionary will be created from inputs object from pydra.utils.typing import TypeParser, OUTPUT_TEMPLATE_TYPES # noqa - if inputs_dict_st is None: - inputs_dict_st = attrs_values(inputs) + if input_values is None: + input_values = attrs_values(spec) if spec_type == "input": - inp_val_set = inputs_dict_st[field.name] + inp_val_set = input_values[field.name] if isinstance(inp_val_set, bool) and field.type in (Path, str): raise TypeError( f"type of '{field.name}' is Path, consider using Union[Path, bool]" @@ -179,21 +180,21 @@ def template_update_single( return inp_val_set if inp_val_set is False: # if input fld is set to False, the fld shouldn't be used (setting NOTHING) - return attr.NOTHING + return None # inputs_dict[field.name] is True or spec_type is output - value = _template_formatting(field, inputs, inputs_dict_st) + value = _template_formatting(field, spec, input_values) # changing path so it is in the output_dir - if output_dir and value is not attr.NOTHING: + if output_dir and value is not None: # should be converted to str, it is also used for input fields that should be str if type(value) is list: return [str(output_dir / Path(val).name) for val in value] else: return str(output_dir / Path(value).name) else: - return attr.NOTHING + return None -def _template_formatting(field, inputs, inputs_dict_st): +def _template_formatting(field, spec, input_values): """Formatting the field template based on the values from inputs. Taking into account that the field with a template can be a MultiOutputFile and the field values needed in the template can be a list - @@ -218,21 +219,20 @@ def _template_formatting(field, inputs, inputs_dict_st): # if a template is a function it has to be run first with the inputs as the only arg template = field.path_template if callable(template): - template = template(inputs) + template = template(spec) # as default, we assume that keep_extension is True if isinstance(template, (tuple, list)): formatted = [ - _string_template_formatting(field, t, inputs, inputs_dict_st) - for t in template + _string_template_formatting(field, t, spec, input_values) for t in template ] else: assert isinstance(template, str) - formatted = _string_template_formatting(field, template, inputs, inputs_dict_st) + formatted = _string_template_formatting(field, template, spec, input_values) return formatted -def _string_template_formatting(field, template, inputs, inputs_dict_st): +def _string_template_formatting(field, template, spec, input_values): from pydra.utils.typing import MultiInputObj, MultiOutputFile inp_fields = re.findall(r"{\w+}", template) @@ -246,9 +246,9 @@ def _string_template_formatting(field, template, inputs, inputs_dict_st): for fld in inp_fields: fld_name = fld[1:-1] # extracting the name form {field_name} - if fld_name not in inputs_dict_st: + if fld_name not in input_values: raise AttributeError(f"{fld_name} is not provided in the input") - fld_value = inputs_dict_st[fld_name] + fld_value = input_values[fld_name] if fld_value is attr.NOTHING: # if value is NOTHING, nothing should be added to the command return attr.NOTHING diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index e18cbaada7..c823f1b174 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1,6 +1,5 @@ """Task I/O specifications.""" -import os from pathlib import Path import re from copy import copy @@ -16,7 +15,7 @@ import cloudpickle as cp from fileformats.generic import File from pydra.engine.audit import AuditFlag -from pydra.utils.typing import TypeParser, MultiOutputObj +from pydra.utils.typing import TypeParser from .helpers import ( attrs_fields, attrs_values, @@ -26,7 +25,7 @@ ensure_list, parse_format_string, ) -from .helpers_file import template_update_single, template_update +from .helpers_file import template_update from pydra.utils.hash import hash_function, Cache from pydra.design.base import Field, Arg, Out, RequirementSet, EMPTY from pydra.design import shell @@ -145,6 +144,28 @@ def _get_node(self): f"{self} outputs object is not a lazy output of a workflow node" ) + def __iter__(self) -> ty.Generator[str, None, None]: + """Iterate through all the names in the specification""" + return (f.name for f in list_fields(self)) + + def __getitem__(self, name: str) -> ty.Any: + """Return the value for the given attribute, resolving any templates + + Parameters + ---------- + name : str + the name of the attribute to return + + Returns + ------- + Any + the value of the attribute + """ + try: + return getattr(self, name) + except AttributeError: + raise KeyError(f"{self} doesn't have an attribute {name}") from None + OutputsType = ty.TypeVar("OutputType", bound=Outputs) @@ -214,6 +235,28 @@ def __call__( ) return task(**kwargs) + def __iter__(self) -> ty.Generator[str, None, None]: + """Iterate through all the names in the specification""" + return (f.name for f in list_fields(self)) + + def __getitem__(self, name: str) -> ty.Any: + """Return the value for the given attribute, resolving any templates + + Parameters + ---------- + name : str + the name of the attribute to return + + Returns + ------- + Any + the value of the attribute + """ + try: + return getattr(self, name) + except AttributeError: + raise KeyError(f"{self} doesn't have an attribute {name}") from None + @property def _hash(self): hsh, self._hashes = self._compute_hashes() @@ -472,65 +515,23 @@ def from_task( """ outputs = super().from_task(task) + fld: shell.out for fld in list_fields(cls): if fld.name in ["return_code", "stdout", "stderr"]: continue - if not TypeParser.is_subclass( - fld.type, - ( - os.PathLike, - MultiOutputObj, - int, - float, - bool, - str, - list, - ), - ): - raise TypeError( - f"Support for {fld.type} type, required for '{fld.name}' in {cls}, " - "has not been implemented in collect_additional_output" - ) # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides if isinstance(fld, shell.outarg) and is_set(getattr(task.spec, fld.name)): - resolved_value = getattr(task.spec, fld.name) + resolved_value = task.inputs[fld.name] elif is_set(fld.default): resolved_value = cls._resolve_default_value(fld, task.output_dir) else: - if fld.type in [int, float, bool, str, list] and not fld.callable: - raise AttributeError( - f"{fld.type} has to have a callable in metadata" - ) - resolved_value = cls._generate_implicit_value( - fld, task.spec, task.output_dir, outputs.stdout, outputs.stderr - ) + resolved_value = task.resolve_value(fld, outputs.stdout, outputs.stderr) # Set the resolved value setattr(outputs, fld.name, resolved_value) return outputs - @classmethod - def _generated_output_names( - cls, inputs: "ShellSpec", output_dir: Path, stdout: str, stderr: str - ): - """Returns a list of all outputs that will be generated by the task. - Takes into account the task input and the requires list for the output fields. - TODO: should be in all Output specs? - """ - # checking the input (if all mandatory fields are provided, etc.) - inputs._check_rules() - output_names = ["return_code", "stdout", "stderr"] - for fld in list_fields(cls): - # assuming that field should have either default or metadata, but not both - if is_set(fld.default): - output_names.append(fld.name) - elif is_set( - cls._generate_implicit_value(fld, inputs, output_dir, stdout, stderr) - ): - output_names.append(fld.name) - return output_names - @classmethod def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: """Resolve path and glob expr default values relative to the output dir""" @@ -554,57 +555,6 @@ def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: raise AttributeError(f"no file matches {default.name}") return default - @classmethod - def _generate_implicit_value( - cls, - fld: shell.out, - inputs: "ShellSpec", - output_dir: Path, - stdout: str, - stderr: str, - ) -> ty.Any: - """Collect output file if metadata specified.""" - if not cls._required_fields_satisfied(fld, inputs): - return attrs.NOTHING - elif isinstance(fld, shell.outarg) and fld.path_template: - return template_update_single( - fld, inputs=inputs, output_dir=output_dir, spec_type="output" - ) - elif fld.callable: - callable_ = fld.callable - if isinstance(fld.callable, staticmethod): - # In case callable is defined as a static method, - # retrieve the function wrapped in the descriptor. - callable_ = fld.callable.__func__ - call_args = inspect.getfullargspec(callable_) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = fld - elif argnm == "output_dir": - call_args_val[argnm] = output_dir - elif argnm == "inputs": - call_args_val[argnm] = inputs - elif argnm == "stdout": - call_args_val[argnm] = stdout - elif argnm == "stderr": - call_args_val[argnm] = stderr - else: - try: - call_args_val[argnm] = getattr(inputs, argnm) - except AttributeError: - raise AttributeError( - f"arguments of the callable function from {fld.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - return callable_(**call_args_val) - else: - raise Exception( - f"Metadata for '{fld.name}', does not not contain any of the required fields " - f'("callable", "output_file_template" or "value"): {fld}.' - ) - @classmethod def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellSpec") -> bool: """checking if all fields from the requires and template are set in the input diff --git a/pydra/engine/task.py b/pydra/engine/task.py index d2acc4ef74..fe9816992d 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -42,24 +42,30 @@ from __future__ import annotations import attrs +import typing as ty import json from pathlib import Path +import inspect from fileformats.core import FileSet from .core import Task from pydra.utils.messenger import AuditFlag from .specs import ( PythonSpec, ShellSpec, + is_set, attrs_fields, ) from .helpers import ( attrs_values, - parse_copyfile, + list_fields, ) -from pydra.engine.helpers_file import is_local_file +from pydra.engine.helpers_file import is_local_file, template_update_single from pydra.utils.typing import TypeParser from .environments import Native +if ty.TYPE_CHECKING: + from pydra.design import shell + class PythonTask(Task): """Wrap a Python callable as a task element.""" @@ -190,7 +196,7 @@ def _prepare_bindings(self, root: str): for fld in attrs_fields(self.spec): if TypeParser.contains_type(FileSet, fld.type): fileset = getattr(self.spec, fld.name) - copy = parse_copyfile(fld)[0] == FileSet.CopyMode.copy + copy = fld.copy_mode == FileSet.CopyMode.copy host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") @@ -203,6 +209,73 @@ def _prepare_bindings(self, root: str): env_path / rel for rel in fileset.relative_fspaths ) + def resolve_value( + self, + fld: "shell.out", + stdout: str, + stderr: str, + ) -> ty.Any: + """Collect output file if metadata specified.""" + from pydra.design import shell + + if not self.spec.Outputs._required_fields_satisfied(fld, self.spec): + return None + elif isinstance(fld, shell.outarg) and fld.path_template: + return template_update_single( + fld, spec=self.spec, output_dir=self.output_dir, spec_type="output" + ) + elif fld.callable: + callable_ = fld.callable + if isinstance(fld.callable, staticmethod): + # In case callable is defined as a static method, + # retrieve the function wrapped in the descriptor. + callable_ = fld.callable.__func__ + call_args = inspect.getfullargspec(callable_) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = fld + elif argnm == "output_dir": + call_args_val[argnm] = self.output_dir + elif argnm == "inputs": + call_args_val[argnm] = self.inputs + elif argnm == "stdout": + call_args_val[argnm] = stdout + elif argnm == "stderr": + call_args_val[argnm] = stderr + else: + try: + call_args_val[argnm] = self.inputs[argnm] + except KeyError as e: + e.add_note( + f"arguments of the callable function from {fld.name} " + f"has to be in inputs or be field or output_dir, " + f"but {argnm} is used" + ) + raise + return callable_(**call_args_val) + else: + raise Exception( + f"Metadata for '{fld.name}', does not not contain any of the required fields " + f'("callable", "output_file_template" or "value"): {fld}.' + ) + + def generated_output_names(self, stdout: str, stderr: str): + """Returns a list of all outputs that will be generated by the task. + Takes into account the task input and the requires list for the output fields. + TODO: should be in all Output specs? + """ + # checking the input (if all mandatory fields are provided, etc.) + self.spec._check_rules() + output_names = ["return_code", "stdout", "stderr"] + for fld in list_fields(self): + # assuming that field should have either default or metadata, but not both + if is_set(fld.default): + output_names.append(fld.name) + elif is_set(self.resolve_value(fld, stdout, stderr)): + output_names.append(fld.name) + return output_names + DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 64bd34a7e5..838e054bdd 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -413,7 +413,7 @@ def test_template_formatting(tmp_path): assert template_update_single( field, inputs, - inputs_dict_st=inputs_dict, + input_values=inputs_dict, output_dir=tmp_path, spec_type="input", ) == [str(tmp_path / "file.bvec"), str(tmp_path / "file.bval")] From 78f51ebabed273741c86d7a4118e93bd04ea8c0e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 10:48:21 +1100 Subject: [PATCH 090/342] moved docs back to original location, renamed new docs to "new-docs" --- .github/workflows/docs.yml | 48 ++++++++++++++---- {old-docs => docs}/.gitignore | 0 docs/Makefile | 42 +++++++-------- {old-docs => docs}/api.rst | 0 {old-docs => docs}/changes.rst | 0 {old-docs => docs}/combiner.rst | 0 {old-docs => docs}/components.rst | 0 {old-docs => docs}/conf.py | 0 docs/{source/_static => }/images/nd_spl_1.png | Bin docs/{source/_static => }/images/nd_spl_3.png | Bin .../_static => }/images/nd_spl_3_comb1.png | Bin .../_static => }/images/nd_spl_3_comb3.png | Bin docs/{source/_static => }/images/nd_spl_4.png | Bin {old-docs => docs}/index.rst | 0 {old-docs => docs}/input_spec.rst | 0 docs/{source/_static => }/logo/pydra_logo.jpg | Bin docs/{source/_static => }/logo/pydra_logo.png | Bin docs/{source/_static => }/logo/pydra_logo.svg | 0 {old-docs => docs}/output_spec.rst | 0 {old-docs => docs}/requirements.txt | 0 {old-docs => docs}/sphinxext/github_link.py | 0 {old-docs => docs}/state.rst | 0 {old-docs => docs}/user_guide.rst | 0 {old-docs => new-docs}/Makefile | 42 ++++++++------- {docs => new-docs}/make.bat | 0 .../source/_static}/images/nd_spl_1.png | Bin .../source/_static}/images/nd_spl_3.png | Bin .../source/_static}/images/nd_spl_3_comb1.png | Bin .../source/_static}/images/nd_spl_3_comb3.png | Bin .../source/_static}/images/nd_spl_4.png | Bin .../source/_static}/logo/pydra_logo.jpg | Bin .../source/_static}/logo/pydra_logo.png | Bin .../source/_static}/logo/pydra_logo.svg | 0 {docs => new-docs}/source/conf.py | 0 .../source/explanation/conditional-lazy.rst | 0 .../source/explanation/hashing-caching.rst | 0 .../source/explanation/provenance.rst | 0 .../explanation/splitting-combining.rst | 0 .../source/explanation/typing.rst | 0 .../source/howto/create-task-package.ipynb | 0 {docs => new-docs}/source/howto/install.ipynb | 0 .../source/howto/port-from-nipype.ipynb | 0 {docs => new-docs}/source/index.rst | 0 {docs => new-docs}/source/reference/api.rst | 0 .../source/tutorial/execution.ipynb | 0 .../source/tutorial/shell.ipynb | 0 {docs => new-docs}/source/tutorial/task.ipynb | 0 .../source/tutorial/workflow.ipynb | 0 {docs => new-docs}/tst.py | 0 pyproject.toml | 1 + 50 files changed, 82 insertions(+), 51 deletions(-) rename {old-docs => docs}/.gitignore (100%) rename {old-docs => docs}/api.rst (100%) rename {old-docs => docs}/changes.rst (100%) rename {old-docs => docs}/combiner.rst (100%) rename {old-docs => docs}/components.rst (100%) rename {old-docs => docs}/conf.py (100%) rename docs/{source/_static => }/images/nd_spl_1.png (100%) rename docs/{source/_static => }/images/nd_spl_3.png (100%) rename docs/{source/_static => }/images/nd_spl_3_comb1.png (100%) rename docs/{source/_static => }/images/nd_spl_3_comb3.png (100%) rename docs/{source/_static => }/images/nd_spl_4.png (100%) rename {old-docs => docs}/index.rst (100%) rename {old-docs => docs}/input_spec.rst (100%) rename docs/{source/_static => }/logo/pydra_logo.jpg (100%) rename docs/{source/_static => }/logo/pydra_logo.png (100%) rename docs/{source/_static => }/logo/pydra_logo.svg (100%) rename {old-docs => docs}/output_spec.rst (100%) rename {old-docs => docs}/requirements.txt (100%) rename {old-docs => docs}/sphinxext/github_link.py (100%) rename {old-docs => docs}/state.rst (100%) rename {old-docs => docs}/user_guide.rst (100%) rename {old-docs => new-docs}/Makefile (86%) rename {docs => new-docs}/make.bat (100%) rename {old-docs => new-docs/source/_static}/images/nd_spl_1.png (100%) rename {old-docs => new-docs/source/_static}/images/nd_spl_3.png (100%) rename {old-docs => new-docs/source/_static}/images/nd_spl_3_comb1.png (100%) rename {old-docs => new-docs/source/_static}/images/nd_spl_3_comb3.png (100%) rename {old-docs => new-docs/source/_static}/images/nd_spl_4.png (100%) rename {old-docs => new-docs/source/_static}/logo/pydra_logo.jpg (100%) rename {old-docs => new-docs/source/_static}/logo/pydra_logo.png (100%) rename {old-docs => new-docs/source/_static}/logo/pydra_logo.svg (100%) rename {docs => new-docs}/source/conf.py (100%) rename {docs => new-docs}/source/explanation/conditional-lazy.rst (100%) rename {docs => new-docs}/source/explanation/hashing-caching.rst (100%) rename {docs => new-docs}/source/explanation/provenance.rst (100%) rename {docs => new-docs}/source/explanation/splitting-combining.rst (100%) rename {docs => new-docs}/source/explanation/typing.rst (100%) rename {docs => new-docs}/source/howto/create-task-package.ipynb (100%) rename {docs => new-docs}/source/howto/install.ipynb (100%) rename {docs => new-docs}/source/howto/port-from-nipype.ipynb (100%) rename {docs => new-docs}/source/index.rst (100%) rename {docs => new-docs}/source/reference/api.rst (100%) rename {docs => new-docs}/source/tutorial/execution.ipynb (100%) rename {docs => new-docs}/source/tutorial/shell.ipynb (100%) rename {docs => new-docs}/source/tutorial/task.ipynb (100%) rename {docs => new-docs}/source/tutorial/workflow.ipynb (100%) rename {docs => new-docs}/tst.py (100%) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a1ce2ebaff..2a8d1dc610 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,12 +4,12 @@ name: Build docs on: + release: + types: [published] push: branches: - master pull_request: - # release: - # types: [published] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -17,6 +17,30 @@ concurrency: jobs: build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + - name: Install package + run: pip install .[doc] + - name: Build docs + run: | + cd docs + make html + cd .. + - uses: actions/upload-artifact@v3 + with: + name: docs + path: docs/build/html + + build-new: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -32,26 +56,32 @@ jobs: run: | python -m pip install --upgrade pip pip install build twine - - - name: Build docs + - name: Install package + run: pip install .[doc] + - name: Build new docs run: | - cd docs + cd new-docs make html - twine upload dist/* + cd .. - uses: actions/upload-artifact@v3 with: - name: built-docs - path: docs/build/html + name: new-docs + path: new-docs/build/html deploy: needs: [build] runs-on: ubuntu-latest steps: - - name: Download built docs + - name: Download docs uses: actions/download-artifact@v3 with: name: built-docs path: docs-build + - name: Download new docs + uses: actions/download-artifact@v3 + with: + name: new-docs + path: docs-build/new - name: Check for GHPAGES_DEPLOY_KEY token id: deployable if: github.event_name == 'release' diff --git a/old-docs/.gitignore b/docs/.gitignore similarity index 100% rename from old-docs/.gitignore rename to docs/.gitignore diff --git a/docs/Makefile b/docs/Makefile index e6d46dcbcc..b167d81714 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,7 +5,9 @@ SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = -BUILDDIR = build +BUILDDIR = _build +CURBRANCH = master +PYTHONPATH = $(PWD) # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) @@ -15,11 +17,11 @@ endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @@ -30,7 +32,6 @@ help: @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" - @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @@ -46,13 +47,16 @@ help: @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" + clean: rm -rf $(BUILDDIR)/* + rm -rf reference/* + rm -rf docs/api html: - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + mkdir -p _static _templates + PYTHONPATH=$(PYTHONPATH) $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." @@ -87,25 +91,17 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pype9.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pydra.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pype9.qhc" - -applehelp: - $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp - @echo - @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." - @echo "N.B. You won't be able to view it unless you put it in" \ - "~/Library/Documentation/Help or install it in your application" \ - "bundle." + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pydra.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/Pype9" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pype9" + @echo "# mkdir -p $$HOME/.local/share/devhelp/pydra" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pydra" @echo "# devhelp" epub: @@ -176,11 +172,6 @@ doctest: @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @@ -190,3 +181,6 @@ pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." + +versioned: + PYTHONPATH=$(PYTHONPATH) sphinx-versioning -vv -l ./docs/conf.py build -r $(CURBRANCH) ./docs/ docs/$(BUILDDIR)/html/ diff --git a/old-docs/api.rst b/docs/api.rst similarity index 100% rename from old-docs/api.rst rename to docs/api.rst diff --git a/old-docs/changes.rst b/docs/changes.rst similarity index 100% rename from old-docs/changes.rst rename to docs/changes.rst diff --git a/old-docs/combiner.rst b/docs/combiner.rst similarity index 100% rename from old-docs/combiner.rst rename to docs/combiner.rst diff --git a/old-docs/components.rst b/docs/components.rst similarity index 100% rename from old-docs/components.rst rename to docs/components.rst diff --git a/old-docs/conf.py b/docs/conf.py similarity index 100% rename from old-docs/conf.py rename to docs/conf.py diff --git a/docs/source/_static/images/nd_spl_1.png b/docs/images/nd_spl_1.png similarity index 100% rename from docs/source/_static/images/nd_spl_1.png rename to docs/images/nd_spl_1.png diff --git a/docs/source/_static/images/nd_spl_3.png b/docs/images/nd_spl_3.png similarity index 100% rename from docs/source/_static/images/nd_spl_3.png rename to docs/images/nd_spl_3.png diff --git a/docs/source/_static/images/nd_spl_3_comb1.png b/docs/images/nd_spl_3_comb1.png similarity index 100% rename from docs/source/_static/images/nd_spl_3_comb1.png rename to docs/images/nd_spl_3_comb1.png diff --git a/docs/source/_static/images/nd_spl_3_comb3.png b/docs/images/nd_spl_3_comb3.png similarity index 100% rename from docs/source/_static/images/nd_spl_3_comb3.png rename to docs/images/nd_spl_3_comb3.png diff --git a/docs/source/_static/images/nd_spl_4.png b/docs/images/nd_spl_4.png similarity index 100% rename from docs/source/_static/images/nd_spl_4.png rename to docs/images/nd_spl_4.png diff --git a/old-docs/index.rst b/docs/index.rst similarity index 100% rename from old-docs/index.rst rename to docs/index.rst diff --git a/old-docs/input_spec.rst b/docs/input_spec.rst similarity index 100% rename from old-docs/input_spec.rst rename to docs/input_spec.rst diff --git a/docs/source/_static/logo/pydra_logo.jpg b/docs/logo/pydra_logo.jpg similarity index 100% rename from docs/source/_static/logo/pydra_logo.jpg rename to docs/logo/pydra_logo.jpg diff --git a/docs/source/_static/logo/pydra_logo.png b/docs/logo/pydra_logo.png similarity index 100% rename from docs/source/_static/logo/pydra_logo.png rename to docs/logo/pydra_logo.png diff --git a/docs/source/_static/logo/pydra_logo.svg b/docs/logo/pydra_logo.svg similarity index 100% rename from docs/source/_static/logo/pydra_logo.svg rename to docs/logo/pydra_logo.svg diff --git a/old-docs/output_spec.rst b/docs/output_spec.rst similarity index 100% rename from old-docs/output_spec.rst rename to docs/output_spec.rst diff --git a/old-docs/requirements.txt b/docs/requirements.txt similarity index 100% rename from old-docs/requirements.txt rename to docs/requirements.txt diff --git a/old-docs/sphinxext/github_link.py b/docs/sphinxext/github_link.py similarity index 100% rename from old-docs/sphinxext/github_link.py rename to docs/sphinxext/github_link.py diff --git a/old-docs/state.rst b/docs/state.rst similarity index 100% rename from old-docs/state.rst rename to docs/state.rst diff --git a/old-docs/user_guide.rst b/docs/user_guide.rst similarity index 100% rename from old-docs/user_guide.rst rename to docs/user_guide.rst diff --git a/old-docs/Makefile b/new-docs/Makefile similarity index 86% rename from old-docs/Makefile rename to new-docs/Makefile index b167d81714..e6d46dcbcc 100644 --- a/old-docs/Makefile +++ b/new-docs/Makefile @@ -5,9 +5,7 @@ SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = -BUILDDIR = _build -CURBRANCH = master -PYTHONPATH = $(PWD) +BUILDDIR = build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) @@ -17,11 +15,11 @@ endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext help: @echo "Please use \`make ' where is one of" @@ -32,6 +30,7 @@ help: @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @@ -47,16 +46,13 @@ help: @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" - + @echo " coverage to run coverage check of the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* - rm -rf reference/* - rm -rf docs/api html: - mkdir -p _static _templates - PYTHONPATH=$(PYTHONPATH) $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." @@ -91,17 +87,25 @@ qthelp: @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pydra.qhcp" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Pype9.qhcp" @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pydra.qhc" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Pype9.qhc" + +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/pydra" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pydra" + @echo "# mkdir -p $$HOME/.local/share/devhelp/Pype9" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Pype9" @echo "# devhelp" epub: @@ -172,6 +176,11 @@ doctest: @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @@ -181,6 +190,3 @@ pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." - -versioned: - PYTHONPATH=$(PYTHONPATH) sphinx-versioning -vv -l ./docs/conf.py build -r $(CURBRANCH) ./docs/ docs/$(BUILDDIR)/html/ diff --git a/docs/make.bat b/new-docs/make.bat similarity index 100% rename from docs/make.bat rename to new-docs/make.bat diff --git a/old-docs/images/nd_spl_1.png b/new-docs/source/_static/images/nd_spl_1.png similarity index 100% rename from old-docs/images/nd_spl_1.png rename to new-docs/source/_static/images/nd_spl_1.png diff --git a/old-docs/images/nd_spl_3.png b/new-docs/source/_static/images/nd_spl_3.png similarity index 100% rename from old-docs/images/nd_spl_3.png rename to new-docs/source/_static/images/nd_spl_3.png diff --git a/old-docs/images/nd_spl_3_comb1.png b/new-docs/source/_static/images/nd_spl_3_comb1.png similarity index 100% rename from old-docs/images/nd_spl_3_comb1.png rename to new-docs/source/_static/images/nd_spl_3_comb1.png diff --git a/old-docs/images/nd_spl_3_comb3.png b/new-docs/source/_static/images/nd_spl_3_comb3.png similarity index 100% rename from old-docs/images/nd_spl_3_comb3.png rename to new-docs/source/_static/images/nd_spl_3_comb3.png diff --git a/old-docs/images/nd_spl_4.png b/new-docs/source/_static/images/nd_spl_4.png similarity index 100% rename from old-docs/images/nd_spl_4.png rename to new-docs/source/_static/images/nd_spl_4.png diff --git a/old-docs/logo/pydra_logo.jpg b/new-docs/source/_static/logo/pydra_logo.jpg similarity index 100% rename from old-docs/logo/pydra_logo.jpg rename to new-docs/source/_static/logo/pydra_logo.jpg diff --git a/old-docs/logo/pydra_logo.png b/new-docs/source/_static/logo/pydra_logo.png similarity index 100% rename from old-docs/logo/pydra_logo.png rename to new-docs/source/_static/logo/pydra_logo.png diff --git a/old-docs/logo/pydra_logo.svg b/new-docs/source/_static/logo/pydra_logo.svg similarity index 100% rename from old-docs/logo/pydra_logo.svg rename to new-docs/source/_static/logo/pydra_logo.svg diff --git a/docs/source/conf.py b/new-docs/source/conf.py similarity index 100% rename from docs/source/conf.py rename to new-docs/source/conf.py diff --git a/docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst similarity index 100% rename from docs/source/explanation/conditional-lazy.rst rename to new-docs/source/explanation/conditional-lazy.rst diff --git a/docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst similarity index 100% rename from docs/source/explanation/hashing-caching.rst rename to new-docs/source/explanation/hashing-caching.rst diff --git a/docs/source/explanation/provenance.rst b/new-docs/source/explanation/provenance.rst similarity index 100% rename from docs/source/explanation/provenance.rst rename to new-docs/source/explanation/provenance.rst diff --git a/docs/source/explanation/splitting-combining.rst b/new-docs/source/explanation/splitting-combining.rst similarity index 100% rename from docs/source/explanation/splitting-combining.rst rename to new-docs/source/explanation/splitting-combining.rst diff --git a/docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst similarity index 100% rename from docs/source/explanation/typing.rst rename to new-docs/source/explanation/typing.rst diff --git a/docs/source/howto/create-task-package.ipynb b/new-docs/source/howto/create-task-package.ipynb similarity index 100% rename from docs/source/howto/create-task-package.ipynb rename to new-docs/source/howto/create-task-package.ipynb diff --git a/docs/source/howto/install.ipynb b/new-docs/source/howto/install.ipynb similarity index 100% rename from docs/source/howto/install.ipynb rename to new-docs/source/howto/install.ipynb diff --git a/docs/source/howto/port-from-nipype.ipynb b/new-docs/source/howto/port-from-nipype.ipynb similarity index 100% rename from docs/source/howto/port-from-nipype.ipynb rename to new-docs/source/howto/port-from-nipype.ipynb diff --git a/docs/source/index.rst b/new-docs/source/index.rst similarity index 100% rename from docs/source/index.rst rename to new-docs/source/index.rst diff --git a/docs/source/reference/api.rst b/new-docs/source/reference/api.rst similarity index 100% rename from docs/source/reference/api.rst rename to new-docs/source/reference/api.rst diff --git a/docs/source/tutorial/execution.ipynb b/new-docs/source/tutorial/execution.ipynb similarity index 100% rename from docs/source/tutorial/execution.ipynb rename to new-docs/source/tutorial/execution.ipynb diff --git a/docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb similarity index 100% rename from docs/source/tutorial/shell.ipynb rename to new-docs/source/tutorial/shell.ipynb diff --git a/docs/source/tutorial/task.ipynb b/new-docs/source/tutorial/task.ipynb similarity index 100% rename from docs/source/tutorial/task.ipynb rename to new-docs/source/tutorial/task.ipynb diff --git a/docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb similarity index 100% rename from docs/source/tutorial/workflow.ipynb rename to new-docs/source/tutorial/workflow.ipynb diff --git a/docs/tst.py b/new-docs/tst.py similarity index 100% rename from docs/tst.py rename to new-docs/tst.py diff --git a/pyproject.toml b/pyproject.toml index eb162c269d..b0f9796355 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ doc = [ "packaging", "sphinx ==6.2.1", "nbsphinx", + "pandoc", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", "furo>=2022.2.14.1", From 363ac5ded2204181449153153b73447cead444f6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 10:55:41 +1100 Subject: [PATCH 091/342] fixed import of version --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 6ab4140a4f..22034515bc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,7 +16,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) sys.path.insert(1, str(Path(__file__).parent / "sphinxext")) -from pydra import __version__ # noqa: E402 +from pydra.engine import __version__ # noqa: E402 from github_link import make_linkcode_resolve # noqa: E402 From bc030f5ae3afa218ee01dc54f3be037af48458f4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 10:57:09 +1100 Subject: [PATCH 092/342] added rtd theme to docs deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b0f9796355..27e8e6e060 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ doc = [ "sphinx ==6.2.1", "nbsphinx", "pandoc", + "sphinx_rtd_theme", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", "furo>=2022.2.14.1", From a3724bdd68301bc05b435e25b7d17f15d0108407 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 10:58:56 +1100 Subject: [PATCH 093/342] fixing up new docs build action --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2a8d1dc610..5f3b28a890 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -51,7 +51,7 @@ jobs: - name: Install Pandoc run: | sudo apt-get update - sudo apt-get install -Y pandoc + sudo apt-get install pandoc - name: Install dependencies run: | python -m pip install --upgrade pip From 9383ccd1440d725c7ded53e5be12567cda0c98fb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:01:27 +1100 Subject: [PATCH 094/342] added docs dep --- .github/workflows/docs.yml | 2 +- pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 5f3b28a890..d9a6dd0730 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -69,7 +69,7 @@ jobs: path: new-docs/build/html deploy: - needs: [build] + needs: [build, build-new] runs-on: ubuntu-latest steps: - name: Download docs diff --git a/pyproject.toml b/pyproject.toml index 27e8e6e060..8f2407de4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dev = ["black", "pre-commit", "pydra[test]"] doc = [ "packaging", "sphinx ==6.2.1", + "sphinxarg", "nbsphinx", "pandoc", "sphinx_rtd_theme", From ee6844a6709f3b862f3dd144869893c80c07a201 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:06:15 +1100 Subject: [PATCH 095/342] revert main docs to latest tag --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d9a6dd0730..e0520d8176 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -20,6 +20,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Revert version to most recent version tag + run: git checkout $(git tag -l | grep 'v.*' | tail -n 1 | awk -F post '{print $1}') - name: Set up Python uses: actions/setup-python@v5 with: From 137965b43996433832611bb0b8d9f95deb3833c3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:09:56 +1100 Subject: [PATCH 096/342] reworking docs CI --- .github/workflows/docs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e0520d8176..f7c2a8c478 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -20,8 +20,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Revert version to most recent version tag - run: git checkout $(git tag -l | grep 'v.*' | tail -n 1 | awk -F post '{print $1}') - name: Set up Python uses: actions/setup-python@v5 with: @@ -32,6 +30,8 @@ jobs: pip install build twine - name: Install package run: pip install .[doc] + - name: Revert version to most recent version tag + run: git checkout $(git tag -l | grep 'v.*' | tail -n 1 | awk -F post '{print $1}') - name: Build docs run: | cd docs From 2574d8e5db725fa5fe9c94dc1013bc0202ea809e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:12:47 +1100 Subject: [PATCH 097/342] touching up docs deps --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8f2407de4e..50420ff8c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dev = ["black", "pre-commit", "pydra[test]"] doc = [ "packaging", "sphinx ==6.2.1", - "sphinxarg", + "sphinx-argparse", "nbsphinx", "pandoc", "sphinx_rtd_theme", From be5d8b5afa26971fc22d998e2664d8ac81b4ace5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:14:41 +1100 Subject: [PATCH 098/342] more docs deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 50420ff8c2..1eca0e3790 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ doc = [ "nbsphinx", "pandoc", "sphinx_rtd_theme", + "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", "furo>=2022.2.14.1", From 6491756e0db541cfbd7a2889e567a86c95d12512 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:16:36 +1100 Subject: [PATCH 099/342] fixed docs ci --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f7c2a8c478..e31ab87c84 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -77,7 +77,7 @@ jobs: - name: Download docs uses: actions/download-artifact@v3 with: - name: built-docs + name: docs path: docs-build - name: Download new docs uses: actions/download-artifact@v3 From 571ba9c1cbc858448bff7d5c9f3e4a00e41c69c2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 11:18:19 +1100 Subject: [PATCH 100/342] docs ci touch up --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e31ab87c84..b021d625af 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -40,7 +40,7 @@ jobs: - uses: actions/upload-artifact@v3 with: name: docs - path: docs/build/html + path: docs/_build/html build-new: runs-on: ubuntu-latest From c4d7468262e5878c1990059aa55c737014fce44b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 12:09:23 +1100 Subject: [PATCH 101/342] removed restriction on release for docs deploy --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b021d625af..6301c4a3ab 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -86,7 +86,7 @@ jobs: path: docs-build/new - name: Check for GHPAGES_DEPLOY_KEY token id: deployable - if: github.event_name == 'release' + # if: github.event_name == 'release' env: GHPAGES_DEPLOY_KEY: "${{ secrets.GHPAGES_DEPLOY_KEY }}" run: if [ -n "$GHPAGES_DEPLOY_KEY" ]; then echo "DEPLOY=true" >> $GITHUB_OUTPUT; fi From 4a8c42dcb51be0f336ac2bf4431b1c3e28aa9b97 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 12:26:51 +1100 Subject: [PATCH 102/342] include ipython in docs deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1eca0e3790..4b2b4af53d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,7 @@ doc = [ "sphinx ==6.2.1", "sphinx-argparse", "nbsphinx", + "ipython", "pandoc", "sphinx_rtd_theme", "sphinx-click", From 1a6b067ceaab627b42582683b012b81c9b53bec7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 27 Dec 2024 14:51:32 +1100 Subject: [PATCH 103/342] renamed Outputs to TaskOutputs --- pydra/design/base.py | 12 ++++++------ pydra/design/workflow.py | 4 ++-- pydra/engine/specs.py | 10 +++++----- pydra/engine/workflow/base.py | 4 ++-- pydra/engine/workflow/node.py | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index e27a1bb0ba..f993e11842 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -26,7 +26,7 @@ if ty.TYPE_CHECKING: - from pydra.engine.specs import TaskSpec, Outputs + from pydra.engine.specs import TaskSpec, TaskOutputs from pydra.engine.core import Task __all__ = [ @@ -351,7 +351,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: def make_task_spec( spec_type: type["TaskSpec"], - out_type: type["Outputs"], + out_type: type["TaskOutputs"], task_type: type["Task"], inputs: dict[str, Arg], outputs: dict[str, Out], @@ -467,11 +467,11 @@ def make_task_spec( def make_outputs_spec( - spec_type: type["Outputs"], + spec_type: type["TaskOutputs"], outputs: dict[str, Out], bases: ty.Sequence[type], spec_name: str, -) -> type["Outputs"]: +) -> type["TaskOutputs"]: """Create an outputs specification class and its outputs specification class from the output fields provided to the decorator/function. @@ -492,10 +492,10 @@ def make_outputs_spec( klass : type The class created using the attrs package """ - from pydra.engine.specs import Outputs + from pydra.engine.specs import TaskOutputs if not any(issubclass(b, spec_type) for b in bases): - if out_spec_bases := [b for b in bases if issubclass(b, Outputs)]: + if out_spec_bases := [b for b in bases if issubclass(b, TaskOutputs)]: raise ValueError( f"Cannot make {spec_type} output spec from {out_spec_bases} bases" ) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 8dfc193c93..7562d78df0 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -15,7 +15,7 @@ if ty.TYPE_CHECKING: from pydra.engine.workflow.base import Workflow - from pydra.engine.specs import TaskSpec, Outputs, WorkflowSpec + from pydra.engine.specs import TaskSpec, TaskOutputs, WorkflowSpec __all__ = ["define", "add", "this", "arg", "out"] @@ -205,7 +205,7 @@ def this() -> "Workflow": return Workflow.under_construction -OutputsType = ty.TypeVar("OutputsType", bound="Outputs") +OutputsType = ty.TypeVar("OutputsType", bound="TaskOutputs") def add(task_spec: "TaskSpec[OutputsType]", name: str = None) -> OutputsType: diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index c823f1b174..8dcb196a35 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -40,7 +40,7 @@ def is_set(value: ty.Any) -> bool: return value not in (attrs.NOTHING, EMPTY) -class Outputs: +class TaskOutputs: """Base class for all output specifications""" RESERVED_FIELD_NAMES = ("inputs", "split", "combine") @@ -167,7 +167,7 @@ def __getitem__(self, name: str) -> ty.Any: raise KeyError(f"{self} doesn't have an attribute {name}") from None -OutputsType = ty.TypeVar("OutputType", bound=Outputs) +OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) class TaskSpec(ty.Generic[OutputsType]): @@ -453,7 +453,7 @@ class RuntimeSpec: network: bool = False -class PythonOutputs(Outputs): +class PythonOutputs(TaskOutputs): pass @@ -464,7 +464,7 @@ class PythonSpec(TaskSpec[PythonOutputsType]): pass -class WorkflowOutputs(Outputs): +class WorkflowOutputs(TaskOutputs): pass @@ -480,7 +480,7 @@ class WorkflowSpec(TaskSpec[WorkflowOutputsType]): STDERR_HELP = """The standard error stream produced by the command.""" -class ShellOutputs(Outputs): +class ShellOutputs(TaskOutputs): """Output specification of a generic shell process.""" return_code: int = shell.out(help_string=RETURN_CODE_HELP) diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index 56c3447e6c..cf649ebd4a 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -4,14 +4,14 @@ from typing_extensions import Self import attrs from pydra.engine.helpers import list_fields, attrs_values, is_lazy -from pydra.engine.specs import TaskSpec, Outputs, WorkflowOutputs +from pydra.engine.specs import TaskSpec, TaskOutputs, WorkflowOutputs from .lazy import LazyInField from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray from .node import Node -OutputsType = ty.TypeVar("OutputType", bound=Outputs) +OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 189fc0cebc..2a3daef980 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -5,7 +5,7 @@ import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy -from ..specs import TaskSpec, Outputs, WorkflowSpec +from ..specs import TaskSpec, TaskOutputs, WorkflowSpec from ..task import Task from ..helpers import ensure_list, attrs_values, is_lazy, load_result, create_checksum from pydra.utils.hash import hash_function @@ -16,7 +16,7 @@ from .base import Workflow -OutputType = ty.TypeVar("OutputType", bound=Outputs) +OutputType = ty.TypeVar("OutputType", bound=TaskOutputs) Splitter = ty.Union[str, ty.Tuple[str, ...]] _not_set = Enum("_not_set", "NOT_SET") From a8ed05e4f603b322d0d669ca15a68103cd920f02 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 28 Dec 2024 18:33:03 +1100 Subject: [PATCH 104/342] finished workflow tutorial and reworked some of the docs structure --- new-docs/source/_static/css/custom.css | 4 + new-docs/source/conf.py | 3 + .../source/explanation/conditional-lazy.rst | 4 +- .../source/explanation/design-approach.rst | 75 ++++ .../source/explanation/hashing-caching.rst | 4 +- new-docs/source/howto/install.ipynb | 93 ----- new-docs/source/howto/real-example.ipynb | 46 +++ new-docs/source/index.rst | 89 +--- new-docs/source/tutorial/execution.ipynb | 2 +- .../tutorial/{task.ipynb => python.ipynb} | 2 +- new-docs/source/tutorial/shell.ipynb | 16 +- new-docs/source/tutorial/tst.py | 27 ++ new-docs/source/tutorial/workflow.ipynb | 380 +++++++++++++++++- pydra/design/base.py | 2 +- 14 files changed, 567 insertions(+), 180 deletions(-) create mode 100644 new-docs/source/_static/css/custom.css create mode 100644 new-docs/source/explanation/design-approach.rst delete mode 100644 new-docs/source/howto/install.ipynb create mode 100644 new-docs/source/howto/real-example.ipynb rename new-docs/source/tutorial/{task.ipynb => python.ipynb} (99%) create mode 100644 new-docs/source/tutorial/tst.py diff --git a/new-docs/source/_static/css/custom.css b/new-docs/source/_static/css/custom.css new file mode 100644 index 0000000000..161e475987 --- /dev/null +++ b/new-docs/source/_static/css/custom.css @@ -0,0 +1,4 @@ +div.nbinput .prompt, +div.nboutput .prompt { + display: none; +} diff --git a/new-docs/source/conf.py b/new-docs/source/conf.py index 552caab751..d5235029a1 100644 --- a/new-docs/source/conf.py +++ b/new-docs/source/conf.py @@ -144,6 +144,9 @@ }, } +html_static_path = ["_static"] +html_css_files = ["css/custom.css"] + # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] diff --git a/new-docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst index 0c30be1d1c..3ed2c600ed 100644 --- a/new-docs/source/explanation/conditional-lazy.rst +++ b/new-docs/source/explanation/conditional-lazy.rst @@ -1,4 +1,4 @@ -Conditionals and lazy fields -============================ +Conditional construction +======================== Work in progress... diff --git a/new-docs/source/explanation/design-approach.rst b/new-docs/source/explanation/design-approach.rst new file mode 100644 index 0000000000..07c94226f1 --- /dev/null +++ b/new-docs/source/explanation/design-approach.rst @@ -0,0 +1,75 @@ + +Design philosophy +================= + +Rationale +--------- + +Scientific workflows often require sophisticated analyses that encompass a large collection +of algorithms. +The algorithms, that were originally not necessarily designed to work together, +and were written by different authors. +Some may be written in Python, while others might require calling external programs. +It is a common practice to create semi-manual workflows that require the scientists +to handle the files and interact with partial results from algorithms and external tools. +This approach is conceptually simple and easy to implement, but the resulting workflow +is often time consuming, error-prone and difficult to share with others. +Consistency, reproducibility and scalability demand scientific workflows +to be organized into fully automated pipelines. +This was the motivation behind Pydra - a new dataflow engine written in Python. + +History +------- + +The Pydra package is a part of the second generation of the Nipype_ ecosystem +--- an open-source framework that provides a uniform interface to existing neuroimaging +software and facilitates interaction between different software components. +The Nipype project was born in the neuroimaging community, and has been helping scientists +build workflows for a decade, providing a uniform interface to such neuroimaging packages +as FSL_, ANTs_, AFNI_, FreeSurfer_ and SPM_. +This flexibility has made it an ideal basis for popular preprocessing tools, +such as fMRIPrep_ and C-PAC_. +The second generation of Nipype ecosystem is meant to provide additional flexibility +and is being developed with reproducibility, ease of use, and scalability in mind. +Pydra itself is a standalone project and is designed as a general-purpose dataflow engine +to support any scientific domain. + +Goals +----- + +The goal of Pydra is to provide a lightweight dataflow engine for computational graph construction, +manipulation, and distributed execution, as well as ensuring reproducibility of scientific pipelines. +In Pydra, a dataflow is represented as a directed acyclic graph, where each node represents a Python +function, execution of an external tool, or another reusable dataflow. +The combination of several key features makes Pydra a customizable and powerful dataflow engine: + +- Composable dataflows: Any node of a dataflow graph can be another dataflow, allowing for nested + dataflows of arbitrary depths and encouraging creating reusable dataflows. + +- Flexible semantics for creating nested loops over input sets: Any Task or dataflow can be run + over input parameter sets and the outputs can be recombined (similar concept to Map-Reduce_ model, + but Pydra extends this to graphs with nested dataflows). + +- A content-addressable global cache: Hash values are computed for each graph and each Task. + This supports reusing of previously computed and stored dataflows and Tasks. + +- Support for Python functions and external (shell) commands: Pydra can decorate and use existing + functions in Python libraries alongside external command line tools, allowing easy integration + of existing code and software. + +- Native container execution support: Any dataflow or Task can be executed in an associated container + (via Docker or Singularity) enabling greater consistency for reproducibility. + +- Auditing and provenance tracking: Pydra provides a simple JSON-LD-based message passing mechanism + to capture the dataflow execution activities as a provenance graph. These messages track inputs + and outputs of each task in a dataflow, and the resources consumed by the task. + +.. _Nipype: https://nipype.readthedocs.io/en/latest/ +.. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL +.. _ANTs: http://stnava.github.io/ANTs/ +.. _AFNI: https://afni.nimh.nih.gov/ +.. _FreeSurfer: https://surfer.nmr.mgh.harvard.edu/ +.. _SPM: https://www.fil.ion.ucl.ac.uk/spm/ +.. _fMRIPrep: https://fmriprep.org/en/stable/ +.. _C-PAC: https://fcp-indi.github.io/docs/latest/index +.. _Map-Reduce: https://en.wikipedia.org/wiki/MapReduce diff --git a/new-docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst index 44e4189b73..ce59e448cf 100644 --- a/new-docs/source/explanation/hashing-caching.rst +++ b/new-docs/source/explanation/hashing-caching.rst @@ -1,4 +1,4 @@ -Hashing and caching -=================== +Caching +======= Work in progress.... diff --git a/new-docs/source/howto/install.ipynb b/new-docs/source/howto/install.ipynb deleted file mode 100644 index 6a0887c1c2..0000000000 --- a/new-docs/source/howto/install.ipynb +++ /dev/null @@ -1,93 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Installation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Pydra is a pure Python package, which only depends on two packages `attrs` and `fileformats`. Therefore, it is straightforward to install via pip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [], - "source": [ - "pip install pydra" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To install a development version use the `dev` install option" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "zsh:1: no matches found: pydra[dev]\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "git clone git@github.com:nipype/pydra.git\n", - "pip install -e ./pydra[dev]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "vscode": { - "languageId": "shellscript" - } - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/new-docs/source/howto/real-example.ipynb b/new-docs/source/howto/real-example.ipynb new file mode 100644 index 0000000000..b2aa082a3b --- /dev/null +++ b/new-docs/source/howto/real-example.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Real-world example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is an real-world example of a workflow to pre-process T1-weighted MRI images for further analysis\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 6d84dfe15e..bd19a73143 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -3,86 +3,32 @@ Pydra ===== -Pydra is a new lightweight dataflow engine written in Python. +Pydra is a new lightweight dataflow engine written in Python, which provides a simple way to +implement scientific workflows that use a mix of shell commands and Python functions. + Pydra is developed as an open-source project in the neuroimaging community, but it is designed as a general-purpose dataflow engine to support any scientific domain. -Rationale ---------- - -Scientific workflows often require sophisticated analyses that encompass a large collection -of algorithms. -The algorithms, that were originally not necessarily designed to work together, -and were written by different authors. -Some may be written in Python, while others might require calling external programs. -It is a common practice to create semi-manual workflows that require the scientists -to handle the files and interact with partial results from algorithms and external tools. -This approach is conceptually simple and easy to implement, but the resulting workflow -is often time consuming, error-prone and difficult to share with others. -Consistency, reproducibility and scalability demand scientific workflows -to be organized into fully automated pipelines. -This was the motivation behind Pydra - a new dataflow engine written in Python. - -History -------- - -The Pydra package is a part of the second generation of the Nipype_ ecosystem ---- an open-source framework that provides a uniform interface to existing neuroimaging -software and facilitates interaction between different software components. -The Nipype project was born in the neuroimaging community, and has been helping scientists -build workflows for a decade, providing a uniform interface to such neuroimaging packages -as FSL_, ANTs_, AFNI_, FreeSurfer_ and SPM_. -This flexibility has made it an ideal basis for popular preprocessing tools, -such as fMRIPrep_ and C-PAC_. -The second generation of Nipype ecosystem is meant to provide additional flexibility -and is being developed with reproducibility, ease of use, and scalability in mind. -Pydra itself is a standalone project and is designed as a general-purpose dataflow engine -to support any scientific domain. - -Design goals ------------- - -The goal of Pydra is to provide a lightweight dataflow engine for computational graph construction, -manipulation, and distributed execution, as well as ensuring reproducibility of scientific pipelines. -In Pydra, a dataflow is represented as a directed acyclic graph, where each node represents a Python -function, execution of an external tool, or another reusable dataflow. -The combination of several key features makes Pydra a customizable and powerful dataflow engine: - -- Composable dataflows: Any node of a dataflow graph can be another dataflow, allowing for nested - dataflows of arbitrary depths and encouraging creating reusable dataflows. +See the :ref:`Design philosophy` for more an explanation of the design +philosophy and goals of Pydra. -- Flexible semantics for creating nested loops over input sets: Any Task or dataflow can be run - over input parameter sets and the outputs can be recombined (similar concept to Map-Reduce_ model, - but Pydra extends this to graphs with nested dataflows). - -- A content-addressable global cache: Hash values are computed for each graph and each Task. - This supports reusing of previously computed and stored dataflows and Tasks. - -- Support for Python functions and external (shell) commands: Pydra can decorate and use existing - functions in Python libraries alongside external command line tools, allowing easy integration - of existing code and software. +Installation +------------ -- Native container execution support: Any dataflow or Task can be executed in an associated container - (via Docker or Singularity) enabling greater consistency for reproducibility. +Pydra itself is a pure-Python package, which has only a handful of dependencies, +therefore, it is straightforward to install via pip -- Auditing and provenance tracking: Pydra provides a simple JSON-LD-based message passing mechanism - to capture the dataflow execution activities as a provenance graph. These messages track inputs - and outputs of each task in a dataflow, and the resources consumed by the task. +.. code-block:: bash -.. _Nipype: https://nipype.readthedocs.io/en/latest/ -.. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL -.. _ANTs: http://stnava.github.io/ANTs/ -.. _AFNI: https://afni.nimh.nih.gov/ -.. _FreeSurfer: https://surfer.nmr.mgh.harvard.edu/ -.. _SPM: https://www.fil.ion.ucl.ac.uk/spm/ -.. _fMRIPrep: https://fmriprep.org/en/stable/ -.. _C-PAC: https://fcp-indi.github.io/docs/latest/index -.. _Map-Reduce: https://en.wikipedia.org/wiki/MapReduce + $ pip install pydra +Of course, if you use Pydra to execute shell-commands tools, you will need to either have +those commands installed on the execution machine, or use software containers +(e.g., Docker or Singularity) to run them. Indices and tables -================== +------------------ * :ref:`genindex` * :ref:`modindex` @@ -94,7 +40,7 @@ Indices and tables :hidden: tutorial/execution - tutorial/task + tutorial/python tutorial/shell tutorial/workflow @@ -103,7 +49,7 @@ Indices and tables :caption: How-to Guides :hidden: - howto/install + howto/real-example howto/create-task-package howto/port-from-nipype @@ -112,6 +58,7 @@ Indices and tables :caption: Explanation :hidden: + explanation/design-approach explanation/splitting-combining explanation/typing explanation/hashing-caching diff --git a/new-docs/source/tutorial/execution.ipynb b/new-docs/source/tutorial/execution.ipynb index 24935a0a52..7250f5ab3c 100644 --- a/new-docs/source/tutorial/execution.ipynb +++ b/new-docs/source/tutorial/execution.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Executing tasks\n", + "# Running tasks\n", "\n", "A *Task* is the basic runnable component in Pydra, and can execute either a Python function,\n", "shell command or workflows consisting of combinations of all three types." diff --git a/new-docs/source/tutorial/task.ipynb b/new-docs/source/tutorial/python.ipynb similarity index 99% rename from new-docs/source/tutorial/task.ipynb rename to new-docs/source/tutorial/python.ipynb index cae0ad88cb..1ea45100ac 100644 --- a/new-docs/source/tutorial/task.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Task design" + "# Python-task design" ] }, { diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index ecc6504156..bf8c852c71 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -398,10 +398,10 @@ " executable = \"cp\"\n", "\n", " in_fs_objects: MultiInputObj[FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", + " recursive: bool = shell.arg(argstr=\"-R\")\n", " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\", default=None)\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\", default=None)\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", "\n", " class Outputs:\n", " out_file: File\n", @@ -434,11 +434,11 @@ "\n", " executable = \"cp\"\n", "\n", - " in_fs_objects: MultiInputObj[FsObject] = shell.arg()\n", - " recursive: bool = shell.arg(default=False)\n", - " text_arg: str = shell.arg()\n", - " int_arg: int | None = shell.arg(default=None)\n", - " tuple_arg: tuple[int, str] | None = shell.arg(default=None)\n", + " in_fs_objects: MultiInputObj[FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", "\n", " @shell.outputs\n", " class Outputs(ShellOutputs):\n", diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py new file mode 100644 index 0000000000..43a263f67a --- /dev/null +++ b/new-docs/source/tutorial/tst.py @@ -0,0 +1,27 @@ +from pydra.design import workflow, python + + +# Example python task specifications +@python.define +def Add(a, b): + return a + b + + +@python.define +def Mul(a, b): + return a * b + + +@python.define +def Sum(x: list[float]) -> float: + return sum(x) + + +@workflow.define +def MySplitWorkflow(a: list[int], b: list[float]) -> list[float]: + # Multiply over all combinations of the elements of a and b, then combine the results + # for each a element into a list over each b element + mul = workflow.add(Mul()).split(x=a, y=b).combine("x") + # Sume the multiplications across all all b elements for each a element + sum = workflow.add(Sum(x=mul.out)) + return sum.out diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index 5538297961..b836ec5c6d 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -7,6 +7,370 @@ "# Workflow design" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Given two task specifications, `Add` and `Mul`" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import workflow, python\n", + "\n", + "# Example python task specifications\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " we can create a simple workflow specification using `workflow.define` to decorate a function that constructs the workflow. Nodes are added to the workflow being constructed by calling `workflow.add` function." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def MyTestWorkflow(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`workflow.add` returns an \"outputs\" object corresponding to the specification added to the workflow. The fields of the outptus object can be referenced as inputs to downstream workflow nodes. Note that these fields are just placeholders for the values that will be returned and can't be used in conditional statements during workflow construction. The return value(s) of workflow constructor function are the placeholders of the fields that are to be the outputs of the workflow.\n", + "\n", + "It is also possible to define new tasks to add to the workflow inline the constructor and type the inputs and outputs of the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "from fileformats import image, video\n", + "\n", + "@workflow.define\n", + "def MyTestShellWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=add_watermark.out_video, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessing the workflow object\n", + "\n", + "If you need to access the workflow object being constructed from inside the constructor function you can use `workflow.this()`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"divided\"])\n", + "def Divide(x, y):\n", + " return x / y\n", + "\n", + "\n", + "@workflow.define(outputs=[\"out1\", \"out2\"])\n", + "def DirectAccesWorkflow(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " return mul.out, divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directly access the workflow being constructed also enables you to set the outputs of the workflow directly" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs={\"out1\": float, \"out2\": float})\n", + "def SetOutputsOfWorkflow(a: int, b: float):\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " # Set the outputs of the workflow directly\n", + " wf.outputs.out1 = mul.out\n", + " wf.outputs.out2 = divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataclass form\n", + "\n", + "Like with Python and shell tasks, it is also possible to specify workflows in \"dataclass form\" in order to be more explicit to linters, which can be worth the extra effort when creating a suite of workflows to be shared publicly. In this case the workflow constructor should be a static method of the dataclasss named `constructor`.\n", + "\n", + "This form also lends itself to defining custom converters and validators on the fields" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from pydra.engine.specs import WorkflowSpec, WorkflowOutputs\n", + "\n", + "def a_converter(value):\n", + " if value is None:\n", + " return value\n", + " return float(value)\n", + "\n", + "@workflow.define\n", + "class MyLibraryWorkflow(WorkflowSpec[\"MyLibraryWorkflow.Outputs\"]):\n", + "\n", + " a: int\n", + " b: float = workflow.arg(\n", + " help_string=\"A float input\",\n", + " converter=a_converter,\n", + " )\n", + "\n", + " @staticmethod\n", + " def constructor(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out\n", + "\n", + " @workflow.outputs\n", + " class Outputs(WorkflowOutputs):\n", + " out: float" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splitting/combining task inputs\n", + "\n", + "Sometimes, you might want to perform the same task over a set of input values/files, and then collect the results into a list to perform further processing. This can be achieved by using the `split` and `combine` methods" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Sum(x: list[float]) -> float:\n", + " return sum(x)\n", + "\n", + "@workflow.define\n", + "def MySplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", + " # Multiply over all combinations of the elements of a and b, then combine the results\n", + " # for each a element into a list over each b element\n", + " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", + " # Sume the multiplications across all all b elements for each a element\n", + " sum = workflow.add(Sum(x=mul.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The combination step doesn't have to be done on the same step as the split, in which case the splits propagate to downstream nodes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def MySplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", + " mul = workflow.add(Mul()).split(x=a, y=b)\n", + " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", + " sum = workflow.add(Sum(x=add.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more advanced discussion on the intricacies of splitting and combining see [Splitting and combining](../explanation/splitting-combining.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nested and conditional workflows\n", + "\n", + "One of the most powerful features of Pydra is the ability to use inline Python code to conditionally add/omit nodes to workflow, and alter the parameterisation of the nodes, depending on inputs to the workflow " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def MyConditionalWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] | None = None,\n", + ") -> video.Mp4:\n", + "\n", + " if watermark_dims is not None:\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " handbrake_input = add_watermark.out_video\n", + " else:\n", + " handbrake_input = input_video\n", + "\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=handbrake_input, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that outputs of upstream nodes cannot be used in conditional statements, since these are just placeholders at the time the workflow is being constructed. However, you can get around\n", + "this limitation by placing the conditional logic within a nested workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Subtract(x: float, y: float) -> float:\n", + " return x - y\n", + "\n", + "@workflow.define\n", + "def RecursiveNestedWorkflow(a: float, depth: int) -> float:\n", + " add = workflow.add(Add(x=a, y=1))\n", + " decrement_depth = workflow.add(Subtract(x=depth, y=1))\n", + " if depth > 0:\n", + " out_node = workflow.add(\n", + " RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out)\n", + " )\n", + " else:\n", + " out_node = add\n", + " return out_node.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", + "placeholders see [Conditional construction](../explanation/conditional-lazy.html)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -14,8 +378,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/pydra/design/base.py b/pydra/design/base.py index f993e11842..dec1be2fc0 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -618,7 +618,7 @@ def ensure_field_objects( out.name = output_name if not out.help_string: out.help_string = output_helps.get(output_name, "") - elif inspect.isclass(out): + elif inspect.isclass(out) or ty.get_origin(out): outputs[output_name] = out_type( type=out, name=output_name, From 8e88ed28473db169d8f16fce98c4ad3b31d7c231 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 28 Dec 2024 22:47:51 +1100 Subject: [PATCH 105/342] finished off the workflow tutorial for now --- new-docs/source/tutorial/workflow.ipynb | 94 ++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 8 deletions(-) diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index b836ec5c6d..37d116d631 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -4,14 +4,22 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Workflow design" + "# Workflow design\n", + "\n", + "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", + "Workflow specifications are dataclasses, which interchangeable with Python and shell tasks\n", + "specifications and executed in the same way." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Given two task specifications, `Add` and `Mul`" + "## Constructor functions\n", + "\n", + "Workflows are typically defined using the `pydra.design.workflow.define` decorator on \n", + "a \"constructor\" function that generates the workflow. For example, given two task\n", + "specifications, `Add` and `Mul`." ] }, { @@ -47,7 +55,7 @@ "outputs": [], "source": [ "@workflow.define\n", - "def MyTestWorkflow(a, b):\n", + "def BasicWorkflow(a, b):\n", " add = workflow.add(Add(a=a, b=b))\n", " mul = workflow.add(Mul(a=add.out, b=b))\n", " return mul.out" @@ -72,7 +80,7 @@ "from fileformats import image, video\n", "\n", "@workflow.define\n", - "def MyTestShellWorkflow(\n", + "def ShellWorkflow(\n", " input_video: video.Mp4,\n", " watermark: image.Png,\n", " watermark_dims: tuple[int, int] = (10, 10),\n", @@ -209,7 +217,7 @@ " return float(value)\n", "\n", "@workflow.define\n", - "class MyLibraryWorkflow(WorkflowSpec[\"MyLibraryWorkflow.Outputs\"]):\n", + "class LibraryWorkflow(WorkflowSpec[\"MyLibraryWorkflow.Outputs\"]):\n", "\n", " a: int\n", " b: float = workflow.arg(\n", @@ -248,7 +256,7 @@ " return sum(x)\n", "\n", "@workflow.define\n", - "def MySplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", + "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", " # Multiply over all combinations of the elements of a and b, then combine the results\n", " # for each a element into a list over each b element\n", " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", @@ -271,7 +279,7 @@ "outputs": [], "source": [ "@workflow.define\n", - "def MySplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", + "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", " mul = workflow.add(Mul()).split(x=a, y=b)\n", " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", " sum = workflow.add(Sum(x=add.out))\n", @@ -301,7 +309,7 @@ "outputs": [], "source": [ "@workflow.define\n", - "def MyConditionalWorkflow(\n", + "def ConditionalWorkflow(\n", " input_video: video.Mp4,\n", " watermark: image.Png,\n", " watermark_dims: tuple[int, int] | None = None,\n", @@ -371,6 +379,76 @@ "placeholders see [Conditional construction](../explanation/conditional-lazy.html)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Typing\n", + "\n", + "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", + "when values or upstream outputs are assigned to task specification inputs.\n", + "\n", + "Task input and output fields do not need to be assigned types, since they will default to `typing.Any`.\n", + "However, if they are assigned a type and a value or output from an upstream node conflicts\n", + "with the type, a `TypeError` will be raised at construction time.\n", + "\n", + "Note that the type-checking \"assumes the best\", and will pass if the upstream field is typed\n", + "by `Any` or a super-class of the field being assigned to. For example, an input of\n", + "`fileformats.generic.File` passed to a field expecting a `fileformats.image.Png` file type,\n", + "because `Png` is a subtype of `File`, where as `fileformats.image.Jpeg` input would fail\n", + "since it is clearly not the intended type.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats import generic\n", + "\n", + "Mp4Handbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "\n", + "QuicktimeHandbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "@workflow.define\n", + "def TypeErrorWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: generic.File,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " ),\n", + " name=\"add_watermark\",\n", + " )\n", + "\n", + " try:\n", + " handbrake = workflow.add(\n", + " QuicktimeHandbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # This will raise a TypeError because the input video is an Mp4\n", + " except TypeError:\n", + " handbrake = workflow.add(\n", + " Mp4Handbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # The type of the input video is now correct\n", + "\n", + " return handbrake.output_video" + ] + }, { "cell_type": "markdown", "metadata": {}, From 6f1c8e38c6635aece57632b96bcad992b992514b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 28 Dec 2024 23:30:23 +1100 Subject: [PATCH 106/342] restructuring new docs --- .../{howto => examples}/real-example.ipynb | 0 .../source/explanation/conditional-lazy.rst | 4 +- .../source/explanation/hashing-caching.rst | 4 +- .../source/howto/create-task-package.ipynb | 2 +- new-docs/source/index.rst | 42 +++++++++++++++--- .../source/tutorial/advanced-execution.ipynb | 44 +++++++++++++++++++ ...{execution.ipynb => getting-started.ipynb} | 2 +- new-docs/source/tutorial/python.ipynb | 2 +- new-docs/source/tutorial/shell.ipynb | 2 +- new-docs/source/tutorial/tst.py | 27 ------------ new-docs/source/tutorial/workflow.ipynb | 16 ++++--- 11 files changed, 100 insertions(+), 45 deletions(-) rename new-docs/source/{howto => examples}/real-example.ipynb (100%) create mode 100644 new-docs/source/tutorial/advanced-execution.ipynb rename new-docs/source/tutorial/{execution.ipynb => getting-started.ipynb} (98%) delete mode 100644 new-docs/source/tutorial/tst.py diff --git a/new-docs/source/howto/real-example.ipynb b/new-docs/source/examples/real-example.ipynb similarity index 100% rename from new-docs/source/howto/real-example.ipynb rename to new-docs/source/examples/real-example.ipynb diff --git a/new-docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst index 3ed2c600ed..0c30be1d1c 100644 --- a/new-docs/source/explanation/conditional-lazy.rst +++ b/new-docs/source/explanation/conditional-lazy.rst @@ -1,4 +1,4 @@ -Conditional construction -======================== +Conditionals and lazy fields +============================ Work in progress... diff --git a/new-docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst index ce59e448cf..d03d4b042c 100644 --- a/new-docs/source/explanation/hashing-caching.rst +++ b/new-docs/source/explanation/hashing-caching.rst @@ -1,4 +1,4 @@ -Caching -======= +Caching and hashing +=================== Work in progress.... diff --git a/new-docs/source/howto/create-task-package.ipynb b/new-docs/source/howto/create-task-package.ipynb index 36777b7084..39aec84713 100644 --- a/new-docs/source/howto/create-task-package.ipynb +++ b/new-docs/source/howto/create-task-package.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Creating a task package" + "# Create a task package" ] }, { diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index bd19a73143..17a5b0e524 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -27,35 +27,67 @@ those commands installed on the execution machine, or use software containers (e.g., Docker or Singularity) to run them. +Tutorials +--------- + +* :ref:`Getting started` +* :ref:`Execution options` +* :ref:`Python-tasks` +* :ref:`Shell-tasks` +* :ref:`Workflows` + +Examples +-------- + +* :ref:`Real-world example` + +How-to Guides +------------- + +* :ref:`Create a task package` +* :ref:`Port interfaces from Nipype` + Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` +.. toctree:: + :maxdepth: 2 + :hidden: + + tutorial/getting-started + tutorial/advanced-execution .. toctree:: :maxdepth: 2 - :caption: Tutorials + :caption: Design :hidden: - tutorial/execution tutorial/python tutorial/shell tutorial/workflow + +.. toctree:: + :maxdepth: 2 + :caption: Examples + :hidden: + + examples/real-example + .. toctree:: :maxdepth: 2 - :caption: How-to Guides + :caption: How-to :hidden: - howto/real-example howto/create-task-package howto/port-from-nipype .. toctree:: :maxdepth: 2 - :caption: Explanation + :caption: In-depth :hidden: explanation/design-approach diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb new file mode 100644 index 0000000000..59bafe2fd6 --- /dev/null +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -0,0 +1,44 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Execution options" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Execution plugins" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cache locations" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environments" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/execution.ipynb b/new-docs/source/tutorial/getting-started.ipynb similarity index 98% rename from new-docs/source/tutorial/execution.ipynb rename to new-docs/source/tutorial/getting-started.ipynb index 7250f5ab3c..c0f362c029 100644 --- a/new-docs/source/tutorial/execution.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Running tasks\n", + "# Getting started\n", "\n", "A *Task* is the basic runnable component in Pydra, and can execute either a Python function,\n", "shell command or workflows consisting of combinations of all three types." diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/python.ipynb index 1ea45100ac..d251a93dac 100644 --- a/new-docs/source/tutorial/python.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Python-task design" + "# Python-tasks" ] }, { diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index bf8c852c71..5416998b67 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Shell-task design" + "# Shell-tasks" ] }, { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py deleted file mode 100644 index 43a263f67a..0000000000 --- a/new-docs/source/tutorial/tst.py +++ /dev/null @@ -1,27 +0,0 @@ -from pydra.design import workflow, python - - -# Example python task specifications -@python.define -def Add(a, b): - return a + b - - -@python.define -def Mul(a, b): - return a * b - - -@python.define -def Sum(x: list[float]) -> float: - return sum(x) - - -@workflow.define -def MySplitWorkflow(a: list[int], b: list[float]) -> list[float]: - # Multiply over all combinations of the elements of a and b, then combine the results - # for each a element into a list over each b element - mul = workflow.add(Mul()).split(x=a, y=b).combine("x") - # Sume the multiplications across all all b elements for each a element - sum = workflow.add(Sum(x=mul.out)) - return sum.out diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index 37d116d631..e6c812e136 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Workflow design\n", + "# Workflows\n", "\n", "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", "Workflow specifications are dataclasses, which interchangeable with Python and shell tasks\n", @@ -208,7 +208,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "from pydra.engine.specs import WorkflowSpec, WorkflowOutputs\n", "\n", "def a_converter(value):\n", @@ -376,14 +375,14 @@ "metadata": {}, "source": [ "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", - "placeholders see [Conditional construction](../explanation/conditional-lazy.html)" + "placeholders see [Conditionals and lazy fields](../explanation/conditional-lazy.html)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Typing\n", + "## Type-checking between nodes\n", "\n", "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", "when values or upstream outputs are assigned to task specification inputs.\n", @@ -430,7 +429,7 @@ " \"ffmpeg -i -i \"\n", " \"-filter_complex \"\n", " )(\n", - " in_video=input_video,\n", + " in_video=input_video, # This is OK because in_video is typed Any\n", " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", " filter=\"overlay={}:{}\".format(*watermark_dims),\n", " ),\n", @@ -449,6 +448,13 @@ " return handbrake.output_video" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion on Pydra's type-checking see [Type Checking](../explanation/typing.html)." + ] + }, { "cell_type": "markdown", "metadata": {}, From 03546ed206146dc854ed0a50de898591fb013937 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 14:47:52 +1100 Subject: [PATCH 107/342] more restructuring of new docs --- ...eal-example.ipynb => t1w-preprocess.ipynb} | 2 +- new-docs/source/explanation/environments.rst | 4 ++ new-docs/source/explanation/provenance.rst | 4 -- new-docs/source/index.rst | 37 +++++++++++++------ .../source/tutorial/advanced-execution.ipynb | 19 ++++++++-- 5 files changed, 46 insertions(+), 20 deletions(-) rename new-docs/source/examples/{real-example.ipynb => t1w-preprocess.ipynb} (96%) create mode 100644 new-docs/source/explanation/environments.rst delete mode 100644 new-docs/source/explanation/provenance.rst diff --git a/new-docs/source/examples/real-example.ipynb b/new-docs/source/examples/t1w-preprocess.ipynb similarity index 96% rename from new-docs/source/examples/real-example.ipynb rename to new-docs/source/examples/t1w-preprocess.ipynb index b2aa082a3b..3c1271d26a 100644 --- a/new-docs/source/examples/real-example.ipynb +++ b/new-docs/source/examples/t1w-preprocess.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Real-world example" + "# T1w MRI preprocessing" ] }, { diff --git a/new-docs/source/explanation/environments.rst b/new-docs/source/explanation/environments.rst new file mode 100644 index 0000000000..778b2bc35a --- /dev/null +++ b/new-docs/source/explanation/environments.rst @@ -0,0 +1,4 @@ +Containers and environments +=========================== + +Work in progress... diff --git a/new-docs/source/explanation/provenance.rst b/new-docs/source/explanation/provenance.rst deleted file mode 100644 index 6f97a3ed96..0000000000 --- a/new-docs/source/explanation/provenance.rst +++ /dev/null @@ -1,4 +0,0 @@ -Provenance -========== - -Work in progress.... diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 17a5b0e524..e95ea081bc 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -9,29 +9,38 @@ implement scientific workflows that use a mix of shell commands and Python funct Pydra is developed as an open-source project in the neuroimaging community, but it is designed as a general-purpose dataflow engine to support any scientific domain. -See the :ref:`Design philosophy` for more an explanation of the design -philosophy and goals of Pydra. +See :ref:`Design philosophy` for more an explanation of the design of Pydra. Installation ------------ Pydra itself is a pure-Python package, which has only a handful of dependencies, -therefore, it is straightforward to install via pip +therefore, it is straightforward to install via pip for Python >= 3.11 .. code-block:: bash $ pip install pydra -Of course, if you use Pydra to execute shell-commands tools, you will need to either have -those commands installed on the execution machine, or use software containers -(e.g., Docker or Singularity) to run them. +Pre-designed tasks are available under the `pydra.tasks.*` package namespace. These tasks +are implemented within separate packages that are typically specific to a given shell-command toolkit such as FSL_, AFNI_ or ANTs_, +or a collection of related tasks/workflows (e.g. `niworkflows`_). Pip can be used to +install these packages as well: + + +.. code-block:: bash + + $ pip install pydra-fsl pydra-ants + +Of course, if you use Pydra to execute commands within toolkits, you will need to +either have those commands installed on the execution machine, or use containers +environments (see [Environments](../explanation/environments.html)) to run them. Tutorials --------- * :ref:`Getting started` -* :ref:`Execution options` +* :ref:`Advanced execution` * :ref:`Python-tasks` * :ref:`Shell-tasks` * :ref:`Workflows` @@ -39,7 +48,7 @@ Tutorials Examples -------- -* :ref:`Real-world example` +* :ref:`T1w MRI preprocessing` How-to Guides ------------- @@ -55,6 +64,7 @@ Indices and tables .. toctree:: :maxdepth: 2 + :caption: Execution :hidden: tutorial/getting-started @@ -75,7 +85,7 @@ Indices and tables :caption: Examples :hidden: - examples/real-example + examples/t1w-preprocess .. toctree:: :maxdepth: 2 @@ -92,10 +102,10 @@ Indices and tables explanation/design-approach explanation/splitting-combining + explanation/conditional-lazy explanation/typing explanation/hashing-caching - explanation/conditional-lazy - explanation/provenance + explanation/environments .. toctree:: @@ -104,3 +114,8 @@ Indices and tables :hidden: reference/api + +.. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL +.. _ANTs: http://stnava.github.io/ANTs/ +.. _AFNI: https://afni.nimh.nih.gov/ +.. _niworkflows: https://niworkflows.readthedocs.io/en/latest/ diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 59bafe2fd6..1105a8f318 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -4,28 +4,39 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Execution options" + "# Advanced execution" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Execution plugins" + "## Plugins" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Cache locations" + "## Caching results\n", + "\n", + "See [Caching and hashing](../explanation/hashing-caching.html) for more details." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Environments" + "## Environments (containers)\n", + "\n", + "See [Containers and Environments](../explanation/environments.rst) for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provenance" ] }, { From 35641b1c6a2ffeccc1745227e69fd0d8af8a4ef9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 19:07:00 +1100 Subject: [PATCH 108/342] writing getting-starting notebook --- .../source/tutorial/getting-started.ipynb | 162 +++++++++++++++--- 1 file changed, 140 insertions(+), 22 deletions(-) diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index c0f362c029..1e91d0960a 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -6,42 +6,160 @@ "source": [ "# Getting started\n", "\n", - "A *Task* is the basic runnable component in Pydra, and can execute either a Python function,\n", - "shell command or workflows consisting of combinations of all three types." + "## Running your first task\n", + "\n", + "The basic runnable component of Pydra is a *task*. Tasks are conceptually similar to\n", + "functions, in that they take inputs, process them and then return results. However,\n", + "unlike functions, tasks are parameterised before they are executed in a separate step.\n", + "This enables parameterised tasks to be linked together into workflows that are checked for\n", + "errors before they are executed, and modular execution workers and environments to specified\n", + "independently of the task being performed.\n", + "\n", + "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", + "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). Pre-define task definitions are run by\n", + "\n", + "* importing the class from the `pydra.tasks.*` package it is in\n", + "* instantiate the class with the parameters of the task\n", + "* \"call\" resulting object to execute it as you would a function (i.e. with the `my_task(...)`)\n", + "\n", + "To demonstrate with a toy example, of loading a JSON file with the `pydra.tasks.common.LoadJson` task, this we first create an example JSON file" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Sample JSON file created at '0UAqFzWsDK4FrUMp48Y3tT3Q.json' with contents: {\"a\": true, \"b\": \"two\", \"c\": 3, \"d\": [7, 0.5598136790149003, 6]}\n", - "Loaded contents: {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]}\n" - ] - } - ], + "outputs": [], "source": [ - "from fileformats.application import Json\n", - "from pydra.tasks.common import LoadJson\n", + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "import json\n", "\n", - "# Create a sample JSON file to test\n", - "json_file = Json.sample()\n", + "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]}\n", "\n", - "# Print the path of the sample JSON file and its contents for reference\n", - "print(f\"Sample JSON file created at {json_file.name!r} with contents: {json_file.read_text()}\")\n", + "test_dir = Path(mkdtemp())\n", + "json_file = test_dir / \"test.json\"\n", + "with open(json_file, \"w\") as f:\n", + " json.dump(JSON_CONTENTS, f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can load the JSON contents back from the file using the `LoadJson` task definition\n", + "class" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# Import the task definition\n", + "from pydra.tasks.common import LoadJson\n", "\n", - "# Parameterise the task specification to load the JSON file\n", + "# Instantiate the task definition, providing the JSON file we want to load\n", "load_json = LoadJson(file=json_file)\n", "\n", "# Run the task to load the JSON file\n", "result = load_json()\n", "\n", - "# Print the output interface of the of the task (LoadJson.Outputs)\n", - "print(f\"Loaded contents: {result.output.out}\")" + "# Access the loaded JSON output contents and check they match original\n", + "assert result.output.out == JSON_CONTENTS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Iterating over inputs\n", + "\n", + "It is straightforward to apply the same operation over a set of inputs using the `split()`\n", + "method. For example, if we wanted to re-grid all the NIfTI images stored in a directory,\n", + "such as the sample ones generated by the code below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.medimage import Nifti\n", + "\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "\n", + "for i in range(10):\n", + " Nifti.sample(nifti_dir, seed=i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then we can by importing the `MrGrid` shell-command task from the `pydra-mrtrix3` package\n", + "and then splitting over the list of files in the directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.tasks.mrtrix3 import MrGrid\n", + "\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "mrgrid = MrGrid(voxel=0.5).split(input=nifti_dir.iterdir())\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "result = mrgrid()\n", + "\n", + "# Print the locations of the output files\n", + "print(\"\\n\".join(str(p) for p in result.output.output))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is also possible to iterate over inputs in pairs, if for example you wanted to use\n", + "different voxel sizes for different images, both the list of images and the voxel sizes\n", + "are passed to the `split()` method and their combination is specified by a tuple \"splitter\"\n", + "(see [Splitting and combining](../explanation/splitting-combining.html) for more details\n", + "on splitters)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define a list of voxel sizes to resample the NIfTI files to, must be the same length\n", + "# as the number of NIfTI files\n", + "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", + "\n", + "mrgrid_varying_sizes = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Run the task to resample all NIfTI files with different voxel sizes\n", + "result = mrgrid()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cache directories\n", + "\n", + "When a task runs, a hash is generated by the combination of all the inputs to the task and the task to be run." ] }, { From ffdfce11cf5aae52014fb1b5dcd9f0e254b9916d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 19:07:34 +1100 Subject: [PATCH 109/342] renamed Spec and specification to Def and definition --- docs/changes.rst | 2 +- docs/components.rst | 6 +- docs/input_spec.rst | 26 +- docs/output_spec.rst | 18 +- new-docs/source/index.rst | 22 +- .../source/tutorial/advanced-execution.ipynb | 9 +- new-docs/source/tutorial/python.ipynb | 594 +++++----- new-docs/source/tutorial/shell.ipynb | 1044 ++++++++--------- new-docs/source/tutorial/workflow.ipynb | 966 +++++++-------- pydra/design/base.py | 36 +- pydra/design/boutiques.py | 8 +- pydra/design/python.py | 14 +- pydra/design/shell.py | 20 +- pydra/design/tests/test_python.py | 162 +-- pydra/design/tests/test_shell.py | 18 +- pydra/design/tests/test_workflow.py | 8 +- pydra/design/workflow.py | 24 +- pydra/engine/audit.py | 2 +- pydra/engine/core.py | 12 +- pydra/engine/helpers.py | 8 +- pydra/engine/specs.py | 30 +- pydra/engine/state.py | 2 +- pydra/engine/task.py | 12 +- pydra/engine/tests/test_helpers_file.py | 4 +- pydra/engine/tests/test_nipype1_convert.py | 8 +- pydra/engine/tests/test_shelltask.py | 166 +-- .../engine/tests/test_shelltask_inputspec.py | 104 +- pydra/engine/tests/test_singularity.py | 24 +- pydra/engine/tests/test_specs.py | 26 +- pydra/engine/tests/test_task.py | 38 +- pydra/engine/tests/test_workflow.py | 10 +- pydra/engine/workers.py | 6 +- pydra/engine/workflow/base.py | 28 +- pydra/engine/workflow/node.py | 14 +- pydra/utils/tests/utils.py | 4 +- pydra/utils/typing.py | 4 +- 36 files changed, 1748 insertions(+), 1731 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 4e23840e90..c3d2814f85 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -108,7 +108,7 @@ Release Notes --- * big changes in ``ShellTask``, ``DockerTask`` and ``SingularityTask`` - * customized input specification and output specification for ``Task``\s + * customized input definition and output definition for ``Task``\s * adding singularity checks to Travis CI * binding all input files to the container * changes in ``Workflow`` diff --git a/docs/components.rst b/docs/components.rst index d4928e82c6..46dcacbe37 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -66,7 +66,7 @@ Shell Command Tasks The *Task* can accommodate more complex shell commands by allowing the user to customize inputs and outputs of the commands. One can generate an input - specification to specify names of inputs, positions in the command, types of + definition to specify names of inputs, positions in the command, types of the inputs, and other metadata. As a specific example, FSL's BET command (Brain Extraction Tool) can be called on the command line as: @@ -76,7 +76,7 @@ Shell Command Tasks bet input_file output_file -m Each of the command argument can be treated as a named input to the - ``ShellCommandTask``, and can be included in the input specification. + ``ShellCommandTask``, and can be included in the input definition. As shown next, even an output is specified by constructing the *out_file* field form a template: @@ -97,7 +97,7 @@ Shell Command Tasks ( "mask", bool, { "help_string": "create binary mask", "argstr": "-m", } ) ], - bases=(ShellSpec,) ) + bases=(ShellDef,) ) ShellCommandTask(executable="bet", input_spec=bet_input_spec) diff --git a/docs/input_spec.rst b/docs/input_spec.rst index 2940c17820..18679d5dee 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -5,7 +5,7 @@ Input Specification As it was mentioned in :ref:`shell_command_task`, the user can customize the input and output for the `ShellCommandTask`. -In this section, more examples of the input specification will be provided. +In this section, more examples of the input definition will be provided. Let's start from the previous example: @@ -27,29 +27,29 @@ Let's start from the previous example: ( "mask", bool, { "help_string": "create binary mask", "argstr": "-m", } ) ], - bases=(ShellSpec,) ) + bases=(ShellDef,) ) ShellCommandTask(executable="bet", input_spec=bet_input_spec) -In order to create an input specification, a new `SpecInfo` object has to be created. +In order to create an input definition, a new `SpecInfo` object has to be created. The field `name` specifies the type of the spec and it should be always "Input" for -the input specification. -The field `bases` specifies the "base specification" you want to use (can think about it as a -`parent class`) and it will usually contains `ShellSpec` only, unless you want to build on top of -your other specification (this will not be cover in this section). +the input definition. +The field `bases` specifies the "base definition" you want to use (can think about it as a +`parent class`) and it will usually contains `ShellDef` only, unless you want to build on top of +your other definition (this will not be cover in this section). The part that should be always customised is the `fields` part. -Each element of the `fields` is a separate input field that is added to the specification. +Each element of the `fields` is a separate input field that is added to the definition. In this example, three-elements tuples - with name, type and dictionary with additional information - are used. But this is only one of the supported syntax, more options will be described below. -Adding a New Field to the Spec +Adding a New Field to the Def ------------------------------ -Pydra uses `attr` classes to represent the input specification, and the full syntax for each field +Pydra uses `attr` classes to represent the input definition, and the full syntax for each field is: .. code-block:: python @@ -152,15 +152,15 @@ In the example we used multiple keys in the metadata dictionary including `help_ `output_file_template` (`str`): If provided, the field is treated also as an output field and it is added to the output spec. The template can use other fields, e.g. `{file1}`. - Used in order to create an output specification. + Used in order to create an output definition. `output_field_name` (`str`, used together with `output_file_template`) If provided the field is added to the output spec with changed name. - Used in order to create an output specification. + Used in order to create an output definition. `keep_extension` (`bool`, default: `True`): A flag that specifies if the file extension should be removed from the field value. - Used in order to create an output specification. + Used in order to create an output definition. `readonly` (`bool`, default: `False`): If `True` the input field can't be provided by the user but it aggregates other input fields diff --git a/docs/output_spec.rst b/docs/output_spec.rst index 2e0907076b..183e273339 100644 --- a/docs/output_spec.rst +++ b/docs/output_spec.rst @@ -5,7 +5,7 @@ Output Specification As it was mentioned in :ref:`shell_command_task`, the user can customize the input and output for the `ShellCommandTask`. -In this section, the output specification will be covered. +In this section, the output definition will be covered. Instead of using field with `output_file_template` in the customized `input_spec` to specify an output field, @@ -29,7 +29,7 @@ a customized `output_spec` can be used, e.g.: ), ) ], - bases=(ShellOutSpec,), + bases=(ShellOutDef,), ) ShellCommandTask(executable=executable, @@ -37,18 +37,18 @@ a customized `output_spec` can be used, e.g.: -Similarly as for `input_spec`, in order to create an output specification, +Similarly as for `input_spec`, in order to create an output definition, a new `SpecInfo` object has to be created. The field `name` specifies the type of the spec and it should be always "Output" for -the output specification. -The field `bases` specifies the "base specification" you want to use (can think about it as a -`parent class`) and it will usually contains `ShellOutSpec` only, unless you want to build on top of -your other specification (this will not be cover in this section). +the output definition. +The field `bases` specifies the "base definition" you want to use (can think about it as a +`parent class`) and it will usually contains `ShellOutDef` only, unless you want to build on top of +your other definition (this will not be cover in this section). The part that should be always customised is the `fields` part. -Each element of the `fields` is a separate output field that is added to the specification. +Each element of the `fields` is a separate output field that is added to the definition. In this example, a three-elements tuple - with name, type and dictionary with additional information - is used. -See :ref:`Input Specification section` for other recognized syntax for specification's fields +See :ref:`Input Specification section` for other recognized syntax for definition's fields and possible types. diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index e95ea081bc..ca804f0839 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -3,11 +3,21 @@ Pydra ===== -Pydra is a new lightweight dataflow engine written in Python, which provides a simple way to -implement scientific workflows that use a mix of shell commands and Python functions. - -Pydra is developed as an open-source project in the neuroimaging community, -but it is designed as a general-purpose dataflow engine to support any scientific domain. +Pydra is a lightweight, Python 3.11+ dataflow engine for computational graph construction, +manipulation, and distributed execution. Designed as a successor to created for [Nipype](https://github.com/nipy/nipype), +Pydra is a general-purpose engine that supports analytics in any scientific domain. +Pydra helps build reproducible, scalable, reusable, and fully automated, provenance +tracked scientific workflows that combine Python functions and shell commands. + +The power of Pydra lies in ease of workflow creation and execution for complex +multiparameter map-reduce operations, and the use of global cache. + +Pydra's key features are: +- Modular execution backends (see [Advanced execution](../tutorial/advanced-execution.html)) +- Map-reduce like semantics (see [Splitting and combining](../explanation/splitting-combining.html)) +- Global cache support to reduce recomputation (see [Hashing and caching](../explanation/hashing-caching.html)) +- Support for execution of Tasks in containerized environments (see [Environments](../explanation/environments.html)) +- Strong type-checking and type-hinting support (see [Typing](../explanation/typing.html)) See :ref:`Design philosophy` for more an explanation of the design of Pydra. @@ -64,7 +74,7 @@ Indices and tables .. toctree:: :maxdepth: 2 - :caption: Execution + :caption: Task execution :hidden: tutorial/getting-started diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 1105a8f318..63a7673daf 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -11,7 +11,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Plugins" + "## Workers\n", + "\n", + "Pydra supports several workers with which to execute tasks\n", + "\n", + "- `ConcurrentFutures`\n", + "- `SLURM`\n", + "- `Dask` (experimental)\n", + "- `Serial` (for debugging)" ] }, { diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/python.ipynb index d251a93dac..a80458a06e 100644 --- a/new-docs/source/tutorial/python.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -1,299 +1,299 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Python-tasks" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "funcOutputs(out=2.0)\n" - ] - } - ], - "source": [ - "from pydra.design import python\n", - "\n", - "\n", - "def func(a: int) -> float:\n", - " \"\"\"Sample function with inputs and outputs\"\"\"\n", - " return a * 2\n", - "\n", - "SampleSpec = python.define(func)\n", - "\n", - "spec = SampleSpec(a=1)\n", - "result = spec()\n", - "print(result.output)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### With typing" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def func(a: int, k: float = 2.0) -> float:\n", - " \"\"\"Sample function with inputs and outputs\"\"\"\n", - " return a * k\n", - "\n", - "SampleSpec = python.define(func)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Augment with explicit inputs and outputs\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from decimal import Decimal\n", - "\n", - "def func(a: int) -> float:\n", - " \"\"\"Sample function with inputs and outputs\"\"\"\n", - " return a * 2\n", - "\n", - "SampleSpec = python.define(\n", - " func,\n", - " inputs={\"a\": python.arg(help_string=\"The argument to be doubled\")},\n", - " outputs={\"b\": python.out(help_string=\"the doubled output\", type=Decimal)},\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Decorated_function" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "# Note we use CamelCase as the function is translated to a class\n", - "\n", - "@python.define(outputs=[\"c\", \"d\"])\n", - "def SampleSpec(a: int, b: float) -> tuple[float, float]:\n", - " \"\"\"Sample function for testing\"\"\"\n", - " return a + b, a * b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Pull helps from docstring" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" - ] - } - ], - "source": [ - "from pprint import pprint\n", - "from pydra.engine.helpers import fields_dict\n", - "\n", - "@python.define(outputs=[\"c\", \"d\"])\n", - "def SampleSpec(a: int, b: float) -> tuple[float, float]:\n", - " \"\"\"Sample function for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - "\n", - " Returns:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - " return a + b, a * b\n", - "\n", - "pprint(fields_dict(SampleSpec))\n", - "pprint(fields_dict(SampleSpec.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dataclass form" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=2.0, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" - ] - } - ], - "source": [ - "\n", - "@python.define\n", - "class SampleSpec:\n", - " \"\"\"Sample class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float = 2.0\n", - "\n", - " class Outputs:\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a * b\n", - "\n", - "pprint(fields_dict(SampleSpec))\n", - "pprint(fields_dict(SampleSpec.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Canonical form (to work with static type-checking)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" - ] - } - ], - "source": [ - "from pydra.engine.specs import PythonSpec, PythonOutputs\n", - "\n", - "@python.define\n", - "class SampleSpec(PythonSpec[\"SampleSpec.Outputs\"]):\n", - " \"\"\"Sample class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float\n", - "\n", - " @python.outputs\n", - " class Outputs(PythonOutputs):\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a * b\n", - "\n", - "pprint(fields_dict(SampleSpec))\n", - "pprint(fields_dict(SampleSpec.Outputs))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python-tasks" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "funcOutputs(out=2.0)\n" + ] + } + ], + "source": [ + "from pydra.design import python\n", + "\n", + "\n", + "def func(a: int) -> float:\n", + " \"\"\"Sample function with inputs and outputs\"\"\"\n", + " return a * 2\n", + "\n", + "SampleDef = python.define(func)\n", + "\n", + "spec = SampleDef(a=1)\n", + "result = spec()\n", + "print(result.output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With typing" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "def func(a: int, k: float = 2.0) -> float:\n", + " \"\"\"Sample function with inputs and outputs\"\"\"\n", + " return a * k\n", + "\n", + "SampleDef = python.define(func)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Augment with explicit inputs and outputs\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from decimal import Decimal\n", + "\n", + "def func(a: int) -> float:\n", + " \"\"\"Sample function with inputs and outputs\"\"\"\n", + " return a * 2\n", + "\n", + "SampleDef = python.define(\n", + " func,\n", + " inputs={\"a\": python.arg(help_string=\"The argument to be doubled\")},\n", + " outputs={\"b\": python.out(help_string=\"the doubled output\", type=Decimal)},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Decorated_function" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# Note we use CamelCase as the function is translated to a class\n", + "\n", + "@python.define(outputs=[\"c\", \"d\"])\n", + "def SampleDef(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a * b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Pull helps from docstring" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" + ] + } + ], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", + "@python.define(outputs=[\"c\", \"d\"])\n", + "def SampleDef(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"Sample function for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + "\n", + " Returns:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + " return a + b, a * b\n", + "\n", + "pprint(fields_dict(SampleDef))\n", + "pprint(fields_dict(SampleDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataclass form" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=2.0, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" + ] + } + ], + "source": [ + "\n", + "@python.define\n", + "class SampleDef:\n", + " \"\"\"Sample class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0\n", + "\n", + " class Outputs:\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a * b\n", + "\n", + "pprint(fields_dict(SampleDef))\n", + "pprint(fields_dict(SampleDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Canonical form (to work with static type-checking)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" + ] + } + ], + "source": [ + "from pydra.engine.specs import PythonDef, PythonOutputs\n", + "\n", + "@python.define\n", + "class SampleDef(PythonDef[\"SampleDef.Outputs\"]):\n", + " \"\"\"Sample class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float\n", + "\n", + " @python.outputs\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a * b\n", + "\n", + "pprint(fields_dict(SampleDef))\n", + "pprint(fields_dict(SampleDef.Outputs))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index 5416998b67..fa68492374 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -1,524 +1,524 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Shell-tasks" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Command-line templates\n", - "\n", - "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import shell\n", - "\n", - "Cp = shell.define(\"cp \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", - "\n", - "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt\n", - "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt'): 'Contents to be copied'\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from tempfile import mkdtemp\n", - "\n", - "# Make a test file to copy\n", - "test_dir = Path(mkdtemp())\n", - "test_file = test_dir / \"in.txt\"\n", - "with open(test_file, \"w\") as f:\n", - " f.write(\"Contents to be copied\")\n", - "\n", - "# Parameterise the task spec\n", - "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", - "\n", - "# Print the cmdline to be run to double check\n", - "print(f\"Command-line to be run: {cp.cmdline}\")\n", - "\n", - "# Run the shell-comand task\n", - "result = cp()\n", - "\n", - "print(\n", - " f\"Contents of copied file ('{result.output.destination}'): \"\n", - " f\"'{Path(result.output.destination).read_text()}'\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If paths to output files are not provided in the parameterisation, it will default to the name of the field" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /Users/tclose/git/workflows/pydra/docs/source/tutorial/destination\n" - ] - } - ], - "source": [ - "cp = Cp(in_file=test_file)\n", - "print(cp.cmdline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Specifying types\n", - "\n", - "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", - "\n", - "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "trim-png /mock/png.png /path/to/output.png\n" - ] - } - ], - "source": [ - "from fileformats.image import Png\n", - "\n", - "TrimPng = shell.define(\"trim-png \")\n", - "\n", - "trim_png = TrimPng(in_image=Png.mock(), out_image=\"/path/to/output.png\")\n", - "\n", - "print(trim_png.cmdline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flags and options\n", - "\n", - "Command line flags can also be added to the shell template, either the single or double hyphen form. The field template name immediately following the flag will be associate with that flag.\n", - "\n", - "If there is no space between the flag and the field template, then the field is assumed to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", - "\n", - "If a field is optional, the field template should end with a `?`. Tuple fields are specified by comma separated types.\n", - "\n", - "Varargs are specified by the type followed by an ellipsis, e.g. ``" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=str | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=6, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" - ] - } - ], - "source": [ - "from pprint import pprint\n", - "from pydra.engine.helpers import fields_dict\n", - "\n", - "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " )\n", - "\n", - "pprint(fields_dict(Cp))\n", - "pprint(fields_dict(Cp.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Defaults\n", - "\n", - "Defaults can be specified by appending them to the field template after `=`" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'--int-arg' default: 99\n" - ] - } - ], - "source": [ - "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " )\n", - "\n", - "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Additional field attributes\n", - "\n", - "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", - " 'recursive': arg(name='recursive', type=, default=False, help_string='If source_file designates a directory, cp copies the directory and the entire subtree connected at that point.', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" - ] - } - ], - "source": [ - "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " inputs={\"recursive\": shell.arg(\n", - " help_string=(\n", - " \"If source_file designates a directory, cp copies the directory and \"\n", - " \"the entire subtree connected at that point.\"\n", - " )\n", - " )},\n", - " outputs={\n", - " \"out_dir\": shell.outarg(position=-2),\n", - " \"out_file\": shell.outarg(position=-1),\n", - " },\n", - " )\n", - "\n", - "\n", - "pprint(fields_dict(Cp))\n", - "pprint(fields_dict(Cp.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Callable outptus\n", - "\n", - "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value. Callables can be either specified in the `callable` attribute of the `shell.out` field, or in a dictionary mapping the output name to the callable" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Size of the output file is: 256\n" - ] - } - ], - "source": [ - "import os\n", - "from pydra.design import shell\n", - "from pathlib import Path\n", - "from fileformats.generic import File\n", - "\n", - "# Arguments to the callable function can be one of \n", - "def get_file_size(out_file: Path) -> int:\n", - " \"\"\"Calculate the file size\"\"\"\n", - " result = os.stat(out_file)\n", - " return result.st_size\n", - "\n", - "\n", - "CpWithSize = shell.define(\n", - " \"cp \",\n", - " outputs={\"out_file_size\": get_file_size},\n", - ")\n", - "\n", - "# Parameterise the task spec\n", - "cp_with_size = CpWithSize(in_file=File.sample())\n", - "\n", - "# Run the command\n", - "result = cp_with_size()\n", - "\n", - "\n", - "print(f\"Size of the output file is: {result.output.out_file_size}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The callable can take any combination of the following arguments, which will be passed\n", - "to it when it is called\n", - "\n", - "* field: the `Field` object to be provided a value, useful when writing generic callables\n", - "* output_dir: a `Path` object referencing the working directory the command was run within\n", - "* inputs: a dictionary containing all the resolved inputs to the task\n", - "* stdout: the standard output stream produced by the command\n", - "* stderr: the standard error stream produced by the command\n", - "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dataclass form\n", - "\n", - "Like with Python tasks, shell-tasks can also be specified in dataclass-form by using `shell.define` as a decorator." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=1, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_file': out(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=None),\n", - " 'out_file_size': out(name='out_file_size', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" - ] - } - ], - "source": [ - "from fileformats.generic import FsObject, Directory\n", - "from pydra.utils.typing import MultiInputObj\n", - "\n", - "@shell.define\n", - "class CpWithSize:\n", - "\n", - " executable = \"cp\"\n", - "\n", - " in_fs_objects: MultiInputObj[FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\")\n", - " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", - "\n", - " class Outputs:\n", - " out_file: File\n", - " out_file_size: int = shell.out(callable=get_file_size)\n", - "\n", - "\n", - "pprint(fields_dict(CpWithSize))\n", - "pprint(fields_dict(CpWithSize.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To make workflows that use the interface type-checkable, the canonical form of a shell\n", - "task dataclass should inherit from `shell.Spec` parameterized by its nested Outputs class,\n", - "and the `Outputs` nested class should inherit from `shell.Outputs`." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.engine.specs import ShellSpec, ShellOutputs\n", - "\n", - "@shell.define\n", - "class Cp(ShellSpec[\"Cp.Outputs\"]):\n", - "\n", - " executable = \"cp\"\n", - "\n", - " in_fs_objects: MultiInputObj[FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", - " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", - "\n", - " @shell.outputs\n", - " class Outputs(ShellOutputs):\n", - " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dynamic specifications\n", - "\n", - "In some cases, it is required to generate the specification for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ACommand input fields: [arg(name='in_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None), outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='a-command', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n", - "ACommand input fields: [outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), out(name='out_file_size', type=, default=EMPTY, help_string='size of the output directory', requires=[], converter=None, validator=None, callable=), out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None), out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None), out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None)]\n" - ] - } - ], - "source": [ - "from fileformats.generic import File\n", - "from pydra.engine.helpers import list_fields\n", - "\n", - "ACommand = shell.define(\n", - " \"a-command\",\n", - " inputs={\n", - " \"in_file\": shell.arg(type=File, help_string=\"output file\", argstr=\"\", position=-2)\n", - " },\n", - " outputs={\n", - " \"out_file\": shell.outarg(\n", - " type=File, help_string=\"output file\", argstr=\"\", position=-1\n", - " ),\n", - " \"out_file_size\": {\n", - " \"type\": int,\n", - " \"help_string\": \"size of the output directory\",\n", - " \"callable\": get_file_size,\n", - " }\n", - " },\n", - ")\n", - "\n", - "\n", - "print(f\"ACommand input fields: {list_fields(ACommand)}\")\n", - "print(f\"ACommand input fields: {list_fields(ACommand.Outputs)}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Shell-tasks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Command-line templates\n", + "\n", + "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "\n", + "Cp = shell.define(\"cp \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", + "\n", + "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt\n", + "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt'): 'Contents to be copied'\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "\n", + "# Make a test file to copy\n", + "test_dir = Path(mkdtemp())\n", + "test_file = test_dir / \"in.txt\"\n", + "with open(test_file, \"w\") as f:\n", + " f.write(\"Contents to be copied\")\n", + "\n", + "# Parameterise the task spec\n", + "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", + "\n", + "# Print the cmdline to be run to double check\n", + "print(f\"Command-line to be run: {cp.cmdline}\")\n", + "\n", + "# Run the shell-comand task\n", + "result = cp()\n", + "\n", + "print(\n", + " f\"Contents of copied file ('{result.output.destination}'): \"\n", + " f\"'{Path(result.output.destination).read_text()}'\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If paths to output files are not provided in the parameterisation, it will default to the name of the field" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /Users/tclose/git/workflows/pydra/docs/source/tutorial/destination\n" + ] + } + ], + "source": [ + "cp = Cp(in_file=test_file)\n", + "print(cp.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defifying types\n", + "\n", + "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", + "\n", + "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "trim-png /mock/png.png /path/to/output.png\n" + ] + } + ], + "source": [ + "from fileformats.image import Png\n", + "\n", + "TrimPng = shell.define(\"trim-png \")\n", + "\n", + "trim_png = TrimPng(in_image=Png.mock(), out_image=\"/path/to/output.png\")\n", + "\n", + "print(trim_png.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Flags and options\n", + "\n", + "Command line flags can also be added to the shell template, either the single or double hyphen form. The field template name immediately following the flag will be associate with that flag.\n", + "\n", + "If there is no space between the flag and the field template, then the field is assumed to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", + "\n", + "If a field is optional, the field template should end with a `?`. Tuple fields are specified by comma separated types.\n", + "\n", + "Varargs are specified by the type followed by an ellipsis, e.g. ``" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=str | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=6, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + ] + } + ], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " )\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defaults\n", + "\n", + "Defaults can be specified by appending them to the field template after `=`" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "'--int-arg' default: 99\n" + ] + } + ], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " )\n", + "\n", + "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Additional field attributes\n", + "\n", + "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", + " 'recursive': arg(name='recursive', type=, default=False, help_string='If source_file designates a directory, cp copies the directory and the entire subtree connected at that point.', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + ] + } + ], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " inputs={\"recursive\": shell.arg(\n", + " help_string=(\n", + " \"If source_file designates a directory, cp copies the directory and \"\n", + " \"the entire subtree connected at that point.\"\n", + " )\n", + " )},\n", + " outputs={\n", + " \"out_dir\": shell.outarg(position=-2),\n", + " \"out_file\": shell.outarg(position=-1),\n", + " },\n", + " )\n", + "\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Callable outptus\n", + "\n", + "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value. Callables can be either specified in the `callable` attribute of the `shell.out` field, or in a dictionary mapping the output name to the callable" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Size of the output file is: 256\n" + ] + } + ], + "source": [ + "import os\n", + "from pydra.design import shell\n", + "from pathlib import Path\n", + "from fileformats.generic import File\n", + "\n", + "# Arguments to the callable function can be one of \n", + "def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + "\n", + "CpWithSize = shell.define(\n", + " \"cp \",\n", + " outputs={\"out_file_size\": get_file_size},\n", + ")\n", + "\n", + "# Parameterise the task spec\n", + "cp_with_size = CpWithSize(in_file=File.sample())\n", + "\n", + "# Run the command\n", + "result = cp_with_size()\n", + "\n", + "\n", + "print(f\"Size of the output file is: {result.output.out_file_size}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The callable can take any combination of the following arguments, which will be passed\n", + "to it when it is called\n", + "\n", + "* field: the `Field` object to be provided a value, useful when writing generic callables\n", + "* output_dir: a `Path` object referencing the working directory the command was run within\n", + "* inputs: a dictionary containing all the resolved inputs to the task\n", + "* stdout: the standard output stream produced by the command\n", + "* stderr: the standard error stream produced by the command\n", + "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataclass form\n", + "\n", + "Like with Python tasks, shell-tasks can also be specified in dataclass-form by using `shell.define` as a decorator." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=1, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_file': out(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=None),\n", + " 'out_file_size': out(name='out_file_size', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + ] + } + ], + "source": [ + "from fileformats.generic import FsObject, Directory\n", + "from pydra.utils.typing import MultiInputObj\n", + "\n", + "@shell.define\n", + "class CpWithSize:\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\")\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " class Outputs:\n", + " out_file: File\n", + " out_file_size: int = shell.out(callable=get_file_size)\n", + "\n", + "\n", + "pprint(fields_dict(CpWithSize))\n", + "pprint(fields_dict(CpWithSize.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make workflows that use the interface type-checkable, the canonical form of a shell\n", + "task dataclass should inherit from `shell.Def` parameterized by its nested Outputs class,\n", + "and the `Outputs` nested class should inherit from `shell.Outputs`." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "\n", + "@shell.define\n", + "class Cp(ShellDef[\"Cp.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " @shell.outputs\n", + " class Outputs(ShellOutputs):\n", + " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dynamic definitions\n", + "\n", + "In some cases, it is required to generate the definition for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ACommand input fields: [arg(name='in_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None), outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='a-command', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n", + "ACommand input fields: [outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), out(name='out_file_size', type=, default=EMPTY, help_string='size of the output directory', requires=[], converter=None, validator=None, callable=), out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None), out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None), out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None)]\n" + ] + } + ], + "source": [ + "from fileformats.generic import File\n", + "from pydra.engine.helpers import list_fields\n", + "\n", + "ACommand = shell.define(\n", + " \"a-command\",\n", + " inputs={\n", + " \"in_file\": shell.arg(type=File, help_string=\"output file\", argstr=\"\", position=-2)\n", + " },\n", + " outputs={\n", + " \"out_file\": shell.outarg(\n", + " type=File, help_string=\"output file\", argstr=\"\", position=-1\n", + " ),\n", + " \"out_file_size\": {\n", + " \"type\": int,\n", + " \"help_string\": \"size of the output directory\",\n", + " \"callable\": get_file_size,\n", + " }\n", + " },\n", + ")\n", + "\n", + "\n", + "print(f\"ACommand input fields: {list_fields(ACommand)}\")\n", + "print(f\"ACommand input fields: {list_fields(ACommand.Outputs)}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index e6c812e136..46e14099e7 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -1,485 +1,485 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Workflows\n", - "\n", - "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", - "Workflow specifications are dataclasses, which interchangeable with Python and shell tasks\n", - "specifications and executed in the same way." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Constructor functions\n", - "\n", - "Workflows are typically defined using the `pydra.design.workflow.define` decorator on \n", - "a \"constructor\" function that generates the workflow. For example, given two task\n", - "specifications, `Add` and `Mul`." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import workflow, python\n", - "\n", - "# Example python task specifications\n", - "@python.define\n", - "def Add(a, b):\n", - " return a + b\n", - "\n", - "\n", - "@python.define\n", - "def Mul(a, b):\n", - " return a * b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " we can create a simple workflow specification using `workflow.define` to decorate a function that constructs the workflow. Nodes are added to the workflow being constructed by calling `workflow.add` function." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define\n", - "def BasicWorkflow(a, b):\n", - " add = workflow.add(Add(a=a, b=b))\n", - " mul = workflow.add(Mul(a=add.out, b=b))\n", - " return mul.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`workflow.add` returns an \"outputs\" object corresponding to the specification added to the workflow. The fields of the outptus object can be referenced as inputs to downstream workflow nodes. Note that these fields are just placeholders for the values that will be returned and can't be used in conditional statements during workflow construction. The return value(s) of workflow constructor function are the placeholders of the fields that are to be the outputs of the workflow.\n", - "\n", - "It is also possible to define new tasks to add to the workflow inline the constructor and type the inputs and outputs of the workflow." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import shell\n", - "from fileformats import image, video\n", - "\n", - "@workflow.define\n", - "def ShellWorkflow(\n", - " input_video: video.Mp4,\n", - " watermark: image.Png,\n", - " watermark_dims: tuple[int, int] = (10, 10),\n", - ") -> video.Mp4:\n", - "\n", - " add_watermark = workflow.add(\n", - " shell.define(\n", - " \"ffmpeg -i -i \"\n", - " \"-filter_complex \"\n", - " )(\n", - " in_video=input_video,\n", - " watermark=watermark,\n", - " filter=\"overlay={}:{}\".format(*watermark_dims),\n", - " )\n", - " )\n", - " output_video = workflow.add(\n", - " shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - " )(in_video=add_watermark.out_video, width=1280, height=720)\n", - " ).out_video\n", - "\n", - " return output_video # test implicit detection of output name" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Accessing the workflow object\n", - "\n", - "If you need to access the workflow object being constructed from inside the constructor function you can use `workflow.this()`." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs=[\"divided\"])\n", - "def Divide(x, y):\n", - " return x / y\n", - "\n", - "\n", - "@workflow.define(outputs=[\"out1\", \"out2\"])\n", - "def DirectAccesWorkflow(a: int, b: float) -> tuple[float, float]:\n", - " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", - "\n", - " Args:\n", - " a: An integer input\n", - " b: A float input\n", - "\n", - " Returns:\n", - " out1: The first output\n", - " out2: The second output\n", - " \"\"\"\n", - "\n", - " wf = workflow.this()\n", - "\n", - " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", - " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", - " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", - "\n", - " # Alter one of the inputs to a node after it has been initialised\n", - " wf[\"Mul\"].inputs.y *= 2\n", - "\n", - " return mul.out, divide.divided" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Directly access the workflow being constructed also enables you to set the outputs of the workflow directly" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define(outputs={\"out1\": float, \"out2\": float})\n", - "def SetOutputsOfWorkflow(a: int, b: float):\n", - " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", - "\n", - " Args:\n", - " a: An integer input\n", - " b: A float input\n", - "\n", - " Returns:\n", - " out1: The first output\n", - " out2: The second output\n", - " \"\"\"\n", - "\n", - " wf = workflow.this()\n", - "\n", - " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", - " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", - " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", - "\n", - " # Alter one of the inputs to a node after it has been initialised\n", - " wf[\"Mul\"].inputs.y *= 2\n", - "\n", - " # Set the outputs of the workflow directly\n", - " wf.outputs.out1 = mul.out\n", - " wf.outputs.out2 = divide.divided" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dataclass form\n", - "\n", - "Like with Python and shell tasks, it is also possible to specify workflows in \"dataclass form\" in order to be more explicit to linters, which can be worth the extra effort when creating a suite of workflows to be shared publicly. In this case the workflow constructor should be a static method of the dataclasss named `constructor`.\n", - "\n", - "This form also lends itself to defining custom converters and validators on the fields" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.engine.specs import WorkflowSpec, WorkflowOutputs\n", - "\n", - "def a_converter(value):\n", - " if value is None:\n", - " return value\n", - " return float(value)\n", - "\n", - "@workflow.define\n", - "class LibraryWorkflow(WorkflowSpec[\"MyLibraryWorkflow.Outputs\"]):\n", - "\n", - " a: int\n", - " b: float = workflow.arg(\n", - " help_string=\"A float input\",\n", - " converter=a_converter,\n", - " )\n", - "\n", - " @staticmethod\n", - " def constructor(a, b):\n", - " add = workflow.add(Add(a=a, b=b))\n", - " mul = workflow.add(Mul(a=add.out, b=b))\n", - " return mul.out\n", - "\n", - " @workflow.outputs\n", - " class Outputs(WorkflowOutputs):\n", - " out: float" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Splitting/combining task inputs\n", - "\n", - "Sometimes, you might want to perform the same task over a set of input values/files, and then collect the results into a list to perform further processing. This can be achieved by using the `split` and `combine` methods" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define\n", - "def Sum(x: list[float]) -> float:\n", - " return sum(x)\n", - "\n", - "@workflow.define\n", - "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", - " # Multiply over all combinations of the elements of a and b, then combine the results\n", - " # for each a element into a list over each b element\n", - " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", - " # Sume the multiplications across all all b elements for each a element\n", - " sum = workflow.add(Sum(x=mul.out))\n", - " return sum.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The combination step doesn't have to be done on the same step as the split, in which case the splits propagate to downstream nodes" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define\n", - "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", - " mul = workflow.add(Mul()).split(x=a, y=b)\n", - " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", - " sum = workflow.add(Sum(x=add.out))\n", - " return sum.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more advanced discussion on the intricacies of splitting and combining see [Splitting and combining](../explanation/splitting-combining.html)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nested and conditional workflows\n", - "\n", - "One of the most powerful features of Pydra is the ability to use inline Python code to conditionally add/omit nodes to workflow, and alter the parameterisation of the nodes, depending on inputs to the workflow " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define\n", - "def ConditionalWorkflow(\n", - " input_video: video.Mp4,\n", - " watermark: image.Png,\n", - " watermark_dims: tuple[int, int] | None = None,\n", - ") -> video.Mp4:\n", - "\n", - " if watermark_dims is not None:\n", - " add_watermark = workflow.add(\n", - " shell.define(\n", - " \"ffmpeg -i -i \"\n", - " \"-filter_complex \"\n", - " )(\n", - " in_video=input_video,\n", - " watermark=watermark,\n", - " filter=\"overlay={}:{}\".format(*watermark_dims),\n", - " )\n", - " )\n", - " handbrake_input = add_watermark.out_video\n", - " else:\n", - " handbrake_input = input_video\n", - "\n", - " output_video = workflow.add(\n", - " shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - " )(in_video=handbrake_input, width=1280, height=720)\n", - " ).out_video\n", - "\n", - " return output_video # test implicit detection of output name" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that outputs of upstream nodes cannot be used in conditional statements, since these are just placeholders at the time the workflow is being constructed. However, you can get around\n", - "this limitation by placing the conditional logic within a nested workflow" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define\n", - "def Subtract(x: float, y: float) -> float:\n", - " return x - y\n", - "\n", - "@workflow.define\n", - "def RecursiveNestedWorkflow(a: float, depth: int) -> float:\n", - " add = workflow.add(Add(x=a, y=1))\n", - " decrement_depth = workflow.add(Subtract(x=depth, y=1))\n", - " if depth > 0:\n", - " out_node = workflow.add(\n", - " RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out)\n", - " )\n", - " else:\n", - " out_node = add\n", - " return out_node.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", - "placeholders see [Conditionals and lazy fields](../explanation/conditional-lazy.html)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Type-checking between nodes\n", - "\n", - "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", - "when values or upstream outputs are assigned to task specification inputs.\n", - "\n", - "Task input and output fields do not need to be assigned types, since they will default to `typing.Any`.\n", - "However, if they are assigned a type and a value or output from an upstream node conflicts\n", - "with the type, a `TypeError` will be raised at construction time.\n", - "\n", - "Note that the type-checking \"assumes the best\", and will pass if the upstream field is typed\n", - "by `Any` or a super-class of the field being assigned to. For example, an input of\n", - "`fileformats.generic.File` passed to a field expecting a `fileformats.image.Png` file type,\n", - "because `Png` is a subtype of `File`, where as `fileformats.image.Jpeg` input would fail\n", - "since it is clearly not the intended type.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from fileformats import generic\n", - "\n", - "Mp4Handbrake = shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - ")\n", - "\n", - "\n", - "QuicktimeHandbrake = shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - ")\n", - "\n", - "@workflow.define\n", - "def TypeErrorWorkflow(\n", - " input_video: video.Mp4,\n", - " watermark: generic.File,\n", - " watermark_dims: tuple[int, int] = (10, 10),\n", - ") -> video.Mp4:\n", - "\n", - " add_watermark = workflow.add(\n", - " shell.define(\n", - " \"ffmpeg -i -i \"\n", - " \"-filter_complex \"\n", - " )(\n", - " in_video=input_video, # This is OK because in_video is typed Any\n", - " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", - " filter=\"overlay={}:{}\".format(*watermark_dims),\n", - " ),\n", - " name=\"add_watermark\",\n", - " )\n", - "\n", - " try:\n", - " handbrake = workflow.add(\n", - " QuicktimeHandbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", - " ) # This will raise a TypeError because the input video is an Mp4\n", - " except TypeError:\n", - " handbrake = workflow.add(\n", - " Mp4Handbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", - " ) # The type of the input video is now correct\n", - "\n", - " return handbrake.output_video" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more detailed discussion on Pydra's type-checking see [Type Checking](../explanation/typing.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Workflows\n", + "\n", + "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", + "Workflow definitions are dataclasses, which interchangeable with Python and shell tasks\n", + "definitions and executed in the same way." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructor functions\n", + "\n", + "Workflows are typically defined using the `pydra.design.workflow.define` decorator on \n", + "a \"constructor\" function that generates the workflow. For example, given two task\n", + "definitions, `Add` and `Mul`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import workflow, python\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " we can create a simple workflow definition using `workflow.define` to decorate a function that constructs the workflow. Nodes are added to the workflow being constructed by calling `workflow.add` function." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def BasicWorkflow(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`workflow.add` returns an \"outputs\" object corresponding to the definition added to the workflow. The fields of the outptus object can be referenced as inputs to downstream workflow nodes. Note that these fields are just placeholders for the values that will be returned and can't be used in conditional statements during workflow construction. The return value(s) of workflow constructor function are the placeholders of the fields that are to be the outputs of the workflow.\n", + "\n", + "It is also possible to define new tasks to add to the workflow inline the constructor and type the inputs and outputs of the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "from fileformats import image, video\n", + "\n", + "@workflow.define\n", + "def ShellWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=add_watermark.out_video, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessing the workflow object\n", + "\n", + "If you need to access the workflow object being constructed from inside the constructor function you can use `workflow.this()`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"divided\"])\n", + "def Divide(x, y):\n", + " return x / y\n", + "\n", + "\n", + "@workflow.define(outputs=[\"out1\", \"out2\"])\n", + "def DirectAccesWorkflow(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " return mul.out, divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directly access the workflow being constructed also enables you to set the outputs of the workflow directly" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs={\"out1\": float, \"out2\": float})\n", + "def SetOutputsOfWorkflow(a: int, b: float):\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " # Set the outputs of the workflow directly\n", + " wf.outputs.out1 = mul.out\n", + " wf.outputs.out2 = divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataclass form\n", + "\n", + "Like with Python and shell tasks, it is also possible to specify workflows in \"dataclass form\" in order to be more explicit to linters, which can be worth the extra effort when creating a suite of workflows to be shared publicly. In this case the workflow constructor should be a static method of the dataclasss named `constructor`.\n", + "\n", + "This form also lends itself to defining custom converters and validators on the fields" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", + "\n", + "def a_converter(value):\n", + " if value is None:\n", + " return value\n", + " return float(value)\n", + "\n", + "@workflow.define\n", + "class LibraryWorkflow(WorkflowDef[\"MyLibraryWorkflow.Outputs\"]):\n", + "\n", + " a: int\n", + " b: float = workflow.arg(\n", + " help_string=\"A float input\",\n", + " converter=a_converter,\n", + " )\n", + "\n", + " @staticmethod\n", + " def constructor(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out\n", + "\n", + " @workflow.outputs\n", + " class Outputs(WorkflowOutputs):\n", + " out: float" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splitting/combining task inputs\n", + "\n", + "Sometimes, you might want to perform the same task over a set of input values/files, and then collect the results into a list to perform further processing. This can be achieved by using the `split` and `combine` methods" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Sum(x: list[float]) -> float:\n", + " return sum(x)\n", + "\n", + "@workflow.define\n", + "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", + " # Multiply over all combinations of the elements of a and b, then combine the results\n", + " # for each a element into a list over each b element\n", + " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", + " # Sume the multiplications across all all b elements for each a element\n", + " sum = workflow.add(Sum(x=mul.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The combination step doesn't have to be done on the same step as the split, in which case the splits propagate to downstream nodes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", + " mul = workflow.add(Mul()).split(x=a, y=b)\n", + " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", + " sum = workflow.add(Sum(x=add.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more advanced discussion on the intricacies of splitting and combining see [Splitting and combining](../explanation/splitting-combining.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nested and conditional workflows\n", + "\n", + "One of the most powerful features of Pydra is the ability to use inline Python code to conditionally add/omit nodes to workflow, and alter the parameterisation of the nodes, depending on inputs to the workflow " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def ConditionalWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] | None = None,\n", + ") -> video.Mp4:\n", + "\n", + " if watermark_dims is not None:\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " handbrake_input = add_watermark.out_video\n", + " else:\n", + " handbrake_input = input_video\n", + "\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=handbrake_input, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that outputs of upstream nodes cannot be used in conditional statements, since these are just placeholders at the time the workflow is being constructed. However, you can get around\n", + "this limitation by placing the conditional logic within a nested workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Subtract(x: float, y: float) -> float:\n", + " return x - y\n", + "\n", + "@workflow.define\n", + "def RecursiveNestedWorkflow(a: float, depth: int) -> float:\n", + " add = workflow.add(Add(x=a, y=1))\n", + " decrement_depth = workflow.add(Subtract(x=depth, y=1))\n", + " if depth > 0:\n", + " out_node = workflow.add(\n", + " RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out)\n", + " )\n", + " else:\n", + " out_node = add\n", + " return out_node.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", + "placeholders see [Conditionals and lazy fields](../explanation/conditional-lazy.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Type-checking between nodes\n", + "\n", + "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", + "when values or upstream outputs are assigned to task definition inputs.\n", + "\n", + "Task input and output fields do not need to be assigned types, since they will default to `typing.Any`.\n", + "However, if they are assigned a type and a value or output from an upstream node conflicts\n", + "with the type, a `TypeError` will be raised at construction time.\n", + "\n", + "Note that the type-checking \"assumes the best\", and will pass if the upstream field is typed\n", + "by `Any` or a super-class of the field being assigned to. For example, an input of\n", + "`fileformats.generic.File` passed to a field expecting a `fileformats.image.Png` file type,\n", + "because `Png` is a subtype of `File`, where as `fileformats.image.Jpeg` input would fail\n", + "since it is clearly not the intended type.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats import generic\n", + "\n", + "Mp4Handbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "\n", + "QuicktimeHandbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "@workflow.define\n", + "def TypeErrorWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: generic.File,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video, # This is OK because in_video is typed Any\n", + " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " ),\n", + " name=\"add_watermark\",\n", + " )\n", + "\n", + " try:\n", + " handbrake = workflow.add(\n", + " QuicktimeHandbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # This will raise a TypeError because the input video is an Mp4\n", + " except TypeError:\n", + " handbrake = workflow.add(\n", + " Mp4Handbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # The type of the input video is now correct\n", + "\n", + " return handbrake.output_video" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion on Pydra's type-checking see [Type Checking](../explanation/typing.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/pydra/design/base.py b/pydra/design/base.py index dec1be2fc0..8e9b42435a 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -26,7 +26,7 @@ if ty.TYPE_CHECKING: - from pydra.engine.specs import TaskSpec, TaskOutputs + from pydra.engine.specs import TaskDef, TaskOutputs from pydra.engine.core import Task __all__ = [ @@ -75,7 +75,7 @@ class Requirement: name: str allowed_values: list[str] = attrs.field(factory=list, converter=list) - def satisfied(self, inputs: "TaskSpec") -> bool: + def satisfied(self, inputs: "TaskDef") -> bool: """Check if the requirement is satisfied by the inputs""" value = getattr(inputs, self.name) if value is attrs.NOTHING: @@ -122,7 +122,7 @@ class RequirementSet: converter=requirements_converter, ) - def satisfied(self, inputs: "TaskSpec") -> bool: + def satisfied(self, inputs: "TaskDef") -> bool: """Check if all the requirements are satisfied by the inputs""" return all(req.satisfied(inputs) for req in self.requirements) @@ -155,7 +155,7 @@ def requires_converter( @attrs.define(kw_only=True) class Field: - """Base class for input and output fields to task specifications + """Base class for input and output fields to task definitions Parameters ---------- @@ -193,14 +193,14 @@ class Field: converter: ty.Callable | None = None validator: ty.Callable | None = None - def requirements_satisfied(self, inputs: "TaskSpec") -> bool: + def requirements_satisfied(self, inputs: "TaskDef") -> bool: """Check if all the requirements are satisfied by the inputs""" return any(req.satisfied(inputs) for req in self.requires) @attrs.define(kw_only=True) class Arg(Field): - """Base class for input fields of task specifications + """Base class for input fields of task definitions Parameters ---------- @@ -242,7 +242,7 @@ class Arg(Field): @attrs.define(kw_only=True) class Out(Field): - """Base class for output fields of task specifications + """Base class for output fields of task definitions Parameters ---------- @@ -350,7 +350,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: def make_task_spec( - spec_type: type["TaskSpec"], + spec_type: type["TaskDef"], out_type: type["TaskOutputs"], task_type: type["Task"], inputs: dict[str, Arg], @@ -360,7 +360,7 @@ def make_task_spec( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), ): - """Create a task specification class and its outputs specification class from the + """Create a task definition class and its outputs definition class from the input and output fields provided to the decorator/function. Modifies the class so that its attributes are converted from pydra fields to attrs fields @@ -380,16 +380,16 @@ def make_task_spec( name : str, optional The name of the class, by default bases : ty.Sequence[type], optional - The base classes for the task specification class, by default () + The base classes for the task definition class, by default () outputs_bases : ty.Sequence[type], optional - The base classes for the outputs specification class, by default () + The base classes for the outputs definition class, by default () Returns ------- klass : type The class created using the attrs package """ - from pydra.engine.specs import TaskSpec + from pydra.engine.specs import TaskDef spec_type._check_arg_refs(inputs, outputs) @@ -403,17 +403,17 @@ def make_task_spec( if klass is None or not issubclass(klass, spec_type): if name is None: raise ValueError("name must be provided if klass is not") - if klass is not None and issubclass(klass, TaskSpec): + if klass is not None and issubclass(klass, TaskDef): raise ValueError(f"Cannot change type of spec {klass} to {spec_type}") bases = tuple(bases) - # Ensure that TaskSpec is a base class + # Ensure that TaskDef is a base class if not any(issubclass(b, spec_type) for b in bases): bases = bases + (spec_type,) # If building from a decorated class (as opposed to dynamically from a function # or shell-template), add any base classes not already in the bases tuple if klass is not None: bases += tuple(c for c in klass.__mro__ if c not in bases + (object,)) - # Create a new class with the TaskSpec as a base class + # Create a new class with the TaskDef as a base class klass = types.new_class( name=name, bases=bases, @@ -472,7 +472,7 @@ def make_outputs_spec( bases: ty.Sequence[type], spec_name: str, ) -> type["TaskOutputs"]: - """Create an outputs specification class and its outputs specification class from the + """Create an outputs definition class and its outputs definition class from the output fields provided to the decorator/function. Creates a new class with attrs fields and then calls `attrs.define` to create an @@ -483,9 +483,9 @@ def make_outputs_spec( outputs : dict[str, Out] The output fields of the task bases : ty.Sequence[type], optional - The base classes for the outputs specification class, by default () + The base classes for the outputs definition class, by default () spec_name : str - The name of the task specification class the outputs are for + The name of the task definition class the outputs are for Returns ------- diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 6877bf4822..54050d60c0 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -5,14 +5,14 @@ from pathlib import Path from functools import reduce from fileformats.generic import File -from pydra.engine.specs import ShellSpec +from pydra.engine.specs import ShellDef from pydra.engine.task import BoshTask from .base import make_task_spec from . import shell class arg(shell.arg): - """Class for input fields of Boutiques task specifications + """Class for input fields of Boutiques task definitions Parameters ---------- @@ -46,7 +46,7 @@ class arg(shell.arg): class out(shell.out): - """Class for output fields of Boutiques task specifications + """Class for output fields of Boutiques task definitions Parameters ---------- @@ -114,7 +114,7 @@ def define( bosh_spec, input_keys, names_subset=output_spec_names ) return make_task_spec( - spec_type=ShellSpec, + spec_type=ShellDef, task_type=BoshTask, out_type=out, arg_type=arg, diff --git a/pydra/design/python.py b/pydra/design/python.py index 625b882367..febc74b98a 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -14,7 +14,7 @@ ) if ty.TYPE_CHECKING: - from pydra.engine.specs import PythonSpec + from pydra.engine.specs import PythonDef __all__ = ["arg", "out", "define"] @@ -101,7 +101,7 @@ def define( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, -) -> "PythonSpec": +) -> "PythonDef": """ Create an interface for a function or a class. @@ -118,13 +118,13 @@ def define( Returns ------- - PythonSpec - The task specification class for the Python function + PythonDef + The task definition class for the Python function """ from pydra.engine.task import PythonTask - from pydra.engine.specs import PythonSpec, PythonOutputs + from pydra.engine.specs import PythonDef, PythonOutputs - def make(wrapped: ty.Callable | type) -> PythonSpec: + def make(wrapped: ty.Callable | type) -> PythonDef: if inspect.isclass(wrapped): klass = wrapped function = klass.function @@ -160,7 +160,7 @@ def make(wrapped: ty.Callable | type) -> PythonSpec: ) interface = make_task_spec( - PythonSpec, + PythonDef, PythonOutputs, PythonTask, parsed_inputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 419279383f..5e38f9ffa4 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -25,7 +25,7 @@ from pydra.utils.typing import is_fileset_or_union, MultiInputObj if ty.TYPE_CHECKING: - from pydra.engine.specs import ShellSpec + from pydra.engine.specs import ShellDef __all__ = ["arg", "out", "outarg", "define"] @@ -190,7 +190,7 @@ class outarg(Out, arg): field will be sent). path_template: str, optional The template used to specify where the output file will be written to can use - other fields, e.g. {file1}. Used in order to create an output specification. + other fields, e.g. {file1}. Used in order to create an output definition. """ path_template: str | None = attrs.field(default=None) @@ -235,9 +235,9 @@ def define( outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, name: str | None = None, -) -> "ShellSpec": - """Create a task specification for a shell command. Can be used either as a decorator on - the "canonical" dataclass-form of a task specification or as a function that takes a +) -> "ShellDef": + """Create a task definition for a shell command. Can be used either as a decorator on + the "canonical" dataclass-form of a task definition or as a function that takes a "shell-command template string" of the form ``` @@ -284,15 +284,15 @@ def define( Returns ------- - ShellSpec + ShellDef The interface for the shell command """ from pydra.engine.task import ShellTask - from pydra.engine.specs import ShellSpec, ShellOutputs + from pydra.engine.specs import ShellDef, ShellOutputs def make( wrapped: ty.Callable | type | None = None, - ) -> ShellSpec: + ) -> ShellDef: if inspect.isclass(wrapped): klass = wrapped @@ -374,7 +374,7 @@ def make( inpt.position = position_stack.pop(0) interface = make_task_spec( - ShellSpec, + ShellDef, ShellOutputs, ShellTask, parsed_inputs, @@ -684,5 +684,5 @@ class _InputPassThrough: name: str - def __call__(self, inputs: ShellSpec) -> ty.Any: + def __call__(self, inputs: ShellDef) -> ty.Any: return getattr(inputs, self.name) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 302c88d14f..82c24c8fd4 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -4,7 +4,7 @@ import attrs import pytest from pydra.engine.helpers import list_fields -from pydra.engine.specs import PythonSpec +from pydra.engine.specs import PythonDef from pydra.design import python from pydra.engine.task import PythonTask @@ -17,21 +17,21 @@ def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleSpec = python.define(func) + SampleDef = python.define(func) - assert issubclass(SampleSpec, PythonSpec) - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="out", type=float)] - spec = SampleSpec(a=1) + spec = SampleDef(a=1) result = spec() assert result.output.out == 2.0 with pytest.raises(TypeError): - SampleSpec(a=1.5) + SampleDef(a=1.5) def test_interface_wrap_function_with_default(): @@ -39,19 +39,19 @@ def func(a: int, k: float = 2.0) -> float: """Sample function with inputs and outputs""" return a * k - SampleSpec = python.define(func) + SampleDef = python.define(func) - assert issubclass(SampleSpec, PythonSpec) - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="function", type=ty.Callable, default=func), python.arg(name="k", type=float, default=2.0), ] assert outputs == [python.out(name="out", type=float)] - assert SampleSpec(a=1)().output.out == 2.0 - assert SampleSpec(a=10, k=3.0)().output.out == 30.0 + assert SampleDef(a=1)().output.out == 2.0 + assert SampleDef(a=10, k=3.0)().output.out == 30.0 def test_interface_wrap_function_overrides(): @@ -59,15 +59,15 @@ def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 - SampleSpec = python.define( + SampleDef = python.define( func, inputs={"a": python.arg(help_string="The argument to be doubled")}, outputs={"b": python.out(help_string="the doubled output", type=Decimal)}, ) - assert issubclass(SampleSpec, PythonSpec) - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="The argument to be doubled"), python.arg(name="function", type=ty.Callable, default=func), @@ -75,7 +75,7 @@ def func(a: int) -> float: assert outputs == [ python.out(name="b", type=Decimal, help_string="the doubled output"), ] - outputs = SampleSpec.Outputs(b=Decimal(2.0)) + outputs = SampleDef.Outputs(b=Decimal(2.0)) assert isinstance(outputs.b, Decimal) @@ -84,84 +84,84 @@ def func(a: int) -> int: """Sample function with inputs and outputs""" return a * 2 - SampleSpec = python.define( + SampleDef = python.define( func, inputs={"a": float}, outputs={"b": float}, ) - assert issubclass(SampleSpec, PythonSpec) - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert issubclass(SampleDef, PythonDef) + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=float), python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="b", type=float)] - intf = SampleSpec(a=1) + intf = SampleDef(a=1) assert isinstance(intf.a, float) - outputs = SampleSpec.Outputs(b=2.0) + outputs = SampleDef.Outputs(b=2.0) assert isinstance(outputs.b, float) def test_decorated_function_interface(): @python.define(outputs=["c", "d"]) - def SampleSpec(a: int, b: float) -> tuple[float, float]: + def SampleDef(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" return a + b, a * b - assert issubclass(SampleSpec, PythonSpec) - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert issubclass(SampleDef, PythonDef) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="b", type=float), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float), python.out(name="d", type=float), ] - assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" - SampleSpec.Outputs(c=1.0, d=2.0) + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" + SampleDef.Outputs(c=1.0, d=2.0) def test_interface_with_function_implicit_outputs_from_return_stmt(): @python.define - def SampleSpec(a: int, b: float) -> tuple[float, float]: + def SampleDef(a: int, b: float) -> tuple[float, float]: """Sample function for testing""" c = a + b d = a * b return c, d - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), python.arg(name="b", type=float), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float), python.out(name="d", type=float), ] - assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" - SampleSpec.Outputs(c=1.0, d=2.0) + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" + SampleDef.Outputs(c=1.0, d=2.0) def test_interface_with_function_docstr(): @python.define(outputs=["c", "d"]) - def SampleSpec(a: int, b: float) -> tuple[float, float]: + def SampleDef(a: int, b: float) -> tuple[float, float]: """Sample function for testing :param a: First input to be inputted @@ -171,28 +171,28 @@ def SampleSpec(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="product of a and b"), ] - assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" def test_interface_with_function_google_docstr(): @python.define(outputs=["c", "d"]) - def SampleSpec(a: int, b: float) -> tuple[float, float]: + def SampleDef(a: int, b: float) -> tuple[float, float]: """Sample function for testing Args: @@ -206,30 +206,30 @@ def SampleSpec(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" def test_interface_with_function_numpy_docstr(): @python.define( outputs=["c", "d"] ) # Could potentiall read output names from doc-string instead - def SampleSpec(a: int, b: float) -> tuple[float, float]: + def SampleDef(a: int, b: float) -> tuple[float, float]: """Sample function for testing Parameters @@ -249,28 +249,28 @@ def SampleSpec(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert attrs.fields(SampleSpec).function.default.__name__ == "SampleSpec" + assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" def test_interface_with_class(): @python.define - class SampleSpec: + class SampleDef: """Sample class for testing Args: @@ -296,32 +296,32 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleSpec, PythonSpec) - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert issubclass(SampleDef, PythonDef) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, default=2.0, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert SampleSpec.function.__name__ == "function" - SampleSpec(a=1) - SampleSpec(a=1, b=2.0) - SampleSpec.Outputs(c=1.0, d=2.0) + assert SampleDef.function.__name__ == "function" + SampleDef(a=1) + SampleDef(a=1, b=2.0) + SampleDef.Outputs(c=1.0, d=2.0) def test_interface_with_inheritance(): @python.define - class SampleSpec(PythonSpec["SampleSpec.Outputs"]): + class SampleDef(PythonDef["SampleDef.Outputs"]): """Sample class for testing Args: @@ -347,12 +347,12 @@ class Outputs: def function(a, b): return a + b, a * b - assert issubclass(SampleSpec, PythonSpec) + assert issubclass(SampleDef, PythonDef) def test_interface_with_class_no_auto_attribs(): @python.define(auto_attribs=False) - class SampleSpec: + class SampleDef: a: int = python.arg(help_string="First input to be inputted") b: float = python.arg(help_string="Second input") @@ -368,36 +368,36 @@ class Outputs: def function(a, b): return a + b, a * b - assert SampleSpec.Task is PythonTask - inputs = sorted(list_fields(SampleSpec), key=sort_key) - outputs = sorted(list_fields(SampleSpec.Outputs), key=sort_key) + assert SampleDef.Task is PythonTask + inputs = sorted(list_fields(SampleDef), key=sort_key) + outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help_string="First input to be inputted"), python.arg(name="b", type=float, help_string="Second input"), python.arg( name="function", type=ty.Callable, - default=attrs.fields(SampleSpec).function.default, + default=attrs.fields(SampleDef).function.default, ), ] assert outputs == [ python.out(name="c", type=float, help_string="Sum of a and b"), python.out(name="d", type=float, help_string="Product of a and b"), ] - assert SampleSpec.function.__name__ == "function" - SampleSpec(a=1, b=2.0) - SampleSpec.Outputs(c=1.0, d=2.0) + assert SampleDef.function.__name__ == "function" + SampleDef(a=1, b=2.0) + SampleDef.Outputs(c=1.0, d=2.0) with pytest.raises(TypeError): - SampleSpec(a=1, b=2.0, x=3) + SampleDef(a=1, b=2.0, x=3) with pytest.raises(TypeError): - SampleSpec.Outputs(c=1.0, d=2.0, y="hello") + SampleDef.Outputs(c=1.0, d=2.0, y="hello") def test_interface_invalid_wrapped1(): with pytest.raises(ValueError): @python.define(inputs={"a": python.arg()}) - class SampleSpec(PythonSpec["SampleSpec.Outputs"]): + class SampleDef(PythonDef["SampleDef.Outputs"]): a: int class Outputs: @@ -412,7 +412,7 @@ def test_interface_invalid_wrapped2(): with pytest.raises(ValueError): @python.define(outputs={"b": python.out()}) - class SampleSpec(PythonSpec["SampleSpec.Outputs"]): + class SampleDef(PythonDef["SampleDef.Outputs"]): a: int class Outputs: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 771bcba995..1f35b4e826 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -7,7 +7,7 @@ from pydra.design import shell from pydra.engine.helpers import list_fields from pydra.engine.specs import ( - ShellSpec, + ShellDef, ShellOutputs, RETURN_CODE_HELP, STDOUT_HELP, @@ -22,7 +22,7 @@ def test_interface_template(): Cp = shell.define("cp ") - assert issubclass(Cp, ShellSpec) + assert issubclass(Cp, ShellDef) output = shell.outarg( name="out_path", path_template="out_path", @@ -69,7 +69,7 @@ def test_interface_template_w_types_and_path_template_ext(): TrimPng = shell.define("trim-png ") - assert issubclass(TrimPng, ShellSpec) + assert issubclass(TrimPng, ShellDef) output = shell.outarg( name="out_image", path_template="out_image.png", @@ -115,7 +115,7 @@ def test_interface_template_w_modify(): TrimPng = shell.define("trim-png ") - assert issubclass(TrimPng, ShellSpec) + assert issubclass(TrimPng, ShellDef) assert sorted_fields(TrimPng) == [ shell.arg( name="executable", @@ -167,7 +167,7 @@ def test_interface_template_more_complex(): ), ) - assert issubclass(Cp, ShellSpec) + assert issubclass(Cp, ShellDef) output = shell.outarg( name="out_dir", type=Directory, @@ -254,7 +254,7 @@ def test_interface_template_with_overrides_and_optionals(): }, ) - assert issubclass(Cp, ShellSpec) + assert issubclass(Cp, ShellDef) outargs = [ shell.outarg( name="out_dir", @@ -340,7 +340,7 @@ def test_interface_template_with_defaults(): ), ) - assert issubclass(Cp, ShellSpec) + assert issubclass(Cp, ShellDef) output = shell.outarg( name="out_dir", type=Directory, @@ -408,7 +408,7 @@ def test_interface_template_with_type_overrides(): inputs={"text_arg": str, "int_arg": int | None}, ) - assert issubclass(Cp, ShellSpec) + assert issubclass(Cp, ShellDef) output = shell.outarg( name="out_dir", type=Directory, @@ -468,7 +468,7 @@ def Ls(request): if request.param == "static": @shell.define - class Ls(ShellSpec["Ls.Outputs"]): + class Ls(ShellDef["Ls.Outputs"]): executable = "ls" directory: Directory = shell.arg( diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index d6b11ab565..10d091db3f 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -7,7 +7,7 @@ import typing as ty from pydra.design import shell, python, workflow from pydra.engine.helpers import list_fields -from pydra.engine.specs import WorkflowSpec, WorkflowOutputs +from pydra.engine.specs import WorkflowDef, WorkflowOutputs from fileformats import video, image # NB: We use PascalCase for interfaces and workflow functions as it is translated into a class @@ -50,7 +50,7 @@ def MyTestWorkflow(a, b): constructor = MyTestWorkflow().constructor assert constructor.__name__ == "MyTestWorkflow" - # The constructor function is included as a part of the specification so it is + # The constructor function is included as a part of the definition so it is # included in the hash by default and can be overridden if needed. Not 100% sure # if this is a good idea or not assert list_fields(MyTestWorkflow) == [ @@ -133,7 +133,7 @@ def test_workflow_canonical(): # NB: We use PascalCase (i.e. class names) as it is translated into a class @workflow.define - class MyTestWorkflow(WorkflowSpec["MyTestWorkflow.Outputs"]): + class MyTestWorkflow(WorkflowDef["MyTestWorkflow.Outputs"]): a: int b: float = workflow.arg( @@ -154,7 +154,7 @@ class Outputs(WorkflowOutputs): constructor = MyTestWorkflow().constructor assert constructor.__name__ == "constructor" - # The constructor function is included as a part of the specification so it is + # The constructor function is included as a part of the definition so it is # included in the hash by default and can be overridden if needed. Not 100% sure # if this is a good idea or not assert sorted(list_fields(MyTestWorkflow), key=attrgetter("name")) == [ diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 7562d78df0..32c84d5a63 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -15,7 +15,7 @@ if ty.TYPE_CHECKING: from pydra.engine.workflow.base import Workflow - from pydra.engine.specs import TaskSpec, TaskOutputs, WorkflowSpec + from pydra.engine.specs import TaskDef, TaskOutputs, WorkflowDef __all__ = ["define", "add", "this", "arg", "out"] @@ -107,10 +107,10 @@ def define( outputs_bases: ty.Sequence[type] = (), lazy: list[str] | None = None, auto_attribs: bool = True, -) -> "WorkflowSpec": +) -> "WorkflowDef": """ Create an interface for a function or a class. Can be used either as a decorator on - a constructor function or the "canonical" dataclass-form of a task specification. + a constructor function or the "canonical" dataclass-form of a task definition. Parameters ---------- @@ -125,16 +125,16 @@ def define( Returns ------- - TaskSpec + TaskDef The interface for the function or class. """ from pydra.engine.core import WorkflowTask - from pydra.engine.specs import TaskSpec, WorkflowSpec, WorkflowOutputs + from pydra.engine.specs import TaskDef, WorkflowDef, WorkflowOutputs if lazy is None: lazy = [] - def make(wrapped: ty.Callable | type) -> TaskSpec: + def make(wrapped: ty.Callable | type) -> TaskDef: if inspect.isclass(wrapped): klass = wrapped constructor = klass.constructor @@ -172,7 +172,7 @@ def make(wrapped: ty.Callable | type) -> TaskSpec: parsed_inputs[inpt_name].lazy = True interface = make_task_spec( - WorkflowSpec, + WorkflowDef, WorkflowOutputs, WorkflowTask, parsed_inputs, @@ -208,20 +208,20 @@ def this() -> "Workflow": OutputsType = ty.TypeVar("OutputsType", bound="TaskOutputs") -def add(task_spec: "TaskSpec[OutputsType]", name: str = None) -> OutputsType: +def add(task_spec: "TaskDef[OutputsType]", name: str = None) -> OutputsType: """Add a node to the workflow currently being constructed Parameters ---------- - task_spec : TaskSpec - The specification of the task to add to the workflow as a node + task_spec : TaskDef + The definition of the task to add to the workflow as a node name : str, optional - The name of the node, by default it will be the name of the task specification + The name of the node, by default it will be the name of the task definition class Returns ------- Outputs - The outputs specification of the node + The outputs definition of the node """ return this().add(task_spec, name=name) diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 6f8d2dd8c4..6f39fda1d2 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -26,7 +26,7 @@ def __init__(self, audit_flags, messengers, messenger_args, develop=None): Base configuration of auditing. messengers : :obj:`pydra.util.messenger.Messenger` or list of :class:`pydra.util.messenger.Messenger`, optional - Specify types of messenger used by Audit to send a message. + Defify types of messenger used by Audit to send a message. Could be `PrintMessenger`, `FileMessenger`, or `RemoteRESTMessenger`. messenger_args : :obj:`dict`, optional Optional arguments for the `Messenger.send` method. diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 780a2d9976..904bbf9b9e 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -19,10 +19,10 @@ from . import helpers_state as hlpst from .specs import ( File, - RuntimeSpec, + RuntimeDef, Result, TaskHook, - TaskSpec, + TaskDef, ) from .helpers import ( create_checksum, @@ -71,14 +71,14 @@ class Task: _can_resume = False # Does the task allow resuming from previous state _redirect_x = False # Whether an X session should be created/directed - _runtime_requirements = RuntimeSpec() + _runtime_requirements = RuntimeDef() _runtime_hints = None _cache_dir = None # Working directory in which to operate _references = None # List of references for a task name: str - spec: TaskSpec + spec: TaskDef _inputs: dict[str, ty.Any] | None = None @@ -336,7 +336,7 @@ def __call__( from .submitter import Submitter if submitter and plugin: - raise Exception("Specify submitter OR plugin, not both") + raise Exception("Defify submitter OR plugin, not both") elif submitter: pass # if there is plugin provided or the task is a Workflow or has a state, @@ -1033,7 +1033,7 @@ async def _run_task(self, submitter, rerun=False, environment=None): # else: # type_ = lf.type # fields.append((wf_out_nm, type_, {"help_string": help_string})) - # self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseSpec,)) + # self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseDef,)) # logger.info("Added %s to %s", self.output_spec, self) def _collect_outputs(self): diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 8b2cc6428a..edf292ed9a 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -18,7 +18,7 @@ from fileformats.core import FileSet if ty.TYPE_CHECKING: - from .specs import TaskSpec + from .specs import TaskDef from pydra.design.base import Field @@ -35,8 +35,8 @@ def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: return attrs.asdict(obj, recurse=False, **kwargs) -def list_fields(spec: "type[TaskSpec] | TaskSpec") -> list["Field"]: - """List the fields of a task specification""" +def list_fields(spec: "type[TaskDef] | TaskDef") -> list["Field"]: + """List the fields of a task definition""" if not inspect.isclass(spec): spec = type(spec) if not attrs.has(spec): @@ -48,7 +48,7 @@ def list_fields(spec: "type[TaskSpec] | TaskSpec") -> list["Field"]: ] -def fields_dict(spec: "type[TaskSpec] | TaskSpec") -> dict[str, "Field"]: +def fields_dict(spec: "type[TaskDef] | TaskDef") -> dict[str, "Field"]: """Returns the fields of a spec in a dictionary""" return {f.name: f for f in list_fields(spec)} diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 8dcb196a35..a30861ca34 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1,4 +1,4 @@ -"""Task I/O specifications.""" +"""Task I/O definitions.""" from pathlib import Path import re @@ -41,7 +41,7 @@ def is_set(value: ty.Any) -> bool: class TaskOutputs: - """Base class for all output specifications""" + """Base class for all output definitions""" RESERVED_FIELD_NAMES = ("inputs", "split", "combine") @@ -145,7 +145,7 @@ def _get_node(self): ) def __iter__(self) -> ty.Generator[str, None, None]: - """Iterate through all the names in the specification""" + """Iterate through all the names in the definition""" return (f.name for f in list_fields(self)) def __getitem__(self, name: str) -> ty.Any: @@ -170,8 +170,8 @@ def __getitem__(self, name: str) -> ty.Any: OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) -class TaskSpec(ty.Generic[OutputsType]): - """Base class for all task specifications""" +class TaskDef(ty.Generic[OutputsType]): + """Base class for all task definitions""" Task: "ty.Type[core.Task]" @@ -190,7 +190,7 @@ def __call__( rerun=False, **kwargs, ) -> "Result[OutputsType]": - """Create a task from this specification and execute it to produce a result. + """Create a task from this definition and execute it to produce a result. Parameters ---------- @@ -236,7 +236,7 @@ def __call__( return task(**kwargs) def __iter__(self) -> ty.Generator[str, None, None]: - """Iterate through all the names in the specification""" + """Iterate through all the names in the definition""" return (f.name for f in list_fields(self)) def __getitem__(self, name: str) -> ty.Any: @@ -429,7 +429,7 @@ def get_output_field(self, field_name): @attrs.define(kw_only=True) -class RuntimeSpec: +class RuntimeDef: """ Specification for a task. @@ -460,7 +460,7 @@ class PythonOutputs(TaskOutputs): PythonOutputsType = ty.TypeVar("OutputType", bound=PythonOutputs) -class PythonSpec(TaskSpec[PythonOutputsType]): +class PythonDef(TaskDef[PythonOutputsType]): pass @@ -471,7 +471,7 @@ class WorkflowOutputs(TaskOutputs): WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) -class WorkflowSpec(TaskSpec[WorkflowOutputsType]): +class WorkflowDef(TaskDef[WorkflowOutputsType]): pass @@ -481,7 +481,7 @@ class WorkflowSpec(TaskSpec[WorkflowOutputsType]): class ShellOutputs(TaskOutputs): - """Output specification of a generic shell process.""" + """Output definition of a generic shell process.""" return_code: int = shell.out(help_string=RETURN_CODE_HELP) stdout: str = shell.out(help_string=STDOUT_HELP) @@ -497,8 +497,8 @@ def from_task( Parameters ---------- - inputs : ShellSpec - The input specification of the shell process. + inputs : ShellDef + The input definition of the shell process. output_dir : Path The directory where the process was run. stdout : str @@ -556,7 +556,7 @@ def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: return default @classmethod - def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellSpec") -> bool: + def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellDef") -> bool: """checking if all fields from the requires and template are set in the input if requires is a list of list, checking if at least one list has all elements set """ @@ -587,7 +587,7 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellSpec") -> bool ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) -class ShellSpec(TaskSpec[ShellOutputsType]): +class ShellDef(TaskDef[ShellOutputsType]): RESERVED_FIELD_NAMES = ("cmdline",) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index fb26767e5e..58a1a68b0a 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -7,7 +7,7 @@ from . import helpers_state as hlpst from .helpers import ensure_list, attrs_values -# from .specs import BaseSpec +# from .specs import BaseDef # TODO: move to State op = {".": zip, "*": itertools.product} diff --git a/pydra/engine/task.py b/pydra/engine/task.py index fe9816992d..b3377a846b 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -50,8 +50,8 @@ from .core import Task from pydra.utils.messenger import AuditFlag from .specs import ( - PythonSpec, - ShellSpec, + PythonDef, + ShellDef, is_set, attrs_fields, ) @@ -70,7 +70,7 @@ class PythonTask(Task): """Wrap a Python callable as a task element.""" - spec: PythonSpec + spec: PythonDef def _run_task(self, environment=None): inputs = attrs_values(self.spec) @@ -97,11 +97,11 @@ def _run_task(self, environment=None): class ShellTask(Task): """Wrap a shell command as a task element.""" - spec: ShellSpec + spec: ShellDef def __init__( self, - spec: ShellSpec, + spec: ShellDef, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cont_dim=None, @@ -133,7 +133,7 @@ def __init__( TODO name : :obj:`str` Name of this task. - output_spec : :obj:`pydra.engine.specs.BaseSpec` + output_spec : :obj:`pydra.engine.specs.BaseDef` Specification of inputs. strip : :obj:`bool` TODO diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 838e054bdd..c0973379c6 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -5,7 +5,7 @@ from unittest.mock import Mock import pytest from fileformats.generic import File -from ..specs import ShellSpec +from ..specs import ShellDef from ..task import ShellTask from ..helpers_file import ( ensure_list, @@ -385,7 +385,7 @@ def test_output_template(tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) class MyCommand(ShellTask): diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index 17384b7644..a93492a0ff 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -1,7 +1,7 @@ import typing as ty import pytest from pathlib import Path -from pydra.engine.specs import ShellOutputs, ShellSpec +from pydra.engine.specs import ShellOutputs, ShellDef from fileformats.generic import File from pydra.design import shell @@ -21,7 +21,7 @@ def find_txt(output_dir: Path) -> File: @shell.define -class Interf_3(ShellSpec["Interf_3.Outputs"]): +class Interf_3(ShellDef["Interf_3.Outputs"]): """class with customized input and executables""" executable = ["testing", "command"] @@ -34,7 +34,7 @@ class Outputs(ShellOutputs): @shell.define -class TouchInterf(ShellSpec["TouchInterf.Outputs"]): +class TouchInterf(ShellDef["TouchInterf.Outputs"]): """class with customized input and executables""" new_file: str = shell.outarg( @@ -58,7 +58,7 @@ def test_interface_specs_2(): my_input_spec = SpecInfo( name="Input", fields=[("my_inp", ty.Any, {"help_string": "my inp"})], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( name="Output", fields=[("my_out", File, "*.txt")], bases=(ShellOutputs,) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 9b70dbc6b9..d03dbd466f 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -11,7 +11,7 @@ from ..submitter import Submitter from ..specs import ( ShellOutputs, - ShellSpec, + ShellDef, ) from fileformats.generic import ( File, @@ -296,7 +296,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -344,7 +344,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -385,7 +385,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -418,7 +418,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): {"position": 1, "help_string": "text", "mandatory": True, "argstr": ""}, ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -456,7 +456,7 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -490,7 +490,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -523,7 +523,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -553,7 +553,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -579,7 +579,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): fields=[ ("text", str, "Hello", {"position": 1, "help_string": "text", "argstr": ""}) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -610,7 +610,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -645,7 +645,7 @@ def test_shell_cmd_inputspec_4c_exception(plugin): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -675,7 +675,7 @@ def test_shell_cmd_inputspec_4d_exception(plugin): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -718,7 +718,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -767,7 +767,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -814,7 +814,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -860,7 +860,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -902,7 +902,7 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -943,7 +943,7 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -988,7 +988,7 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1035,7 +1035,7 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1074,7 +1074,7 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1132,7 +1132,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1189,7 +1189,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1240,7 +1240,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1293,7 +1293,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1341,7 +1341,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1392,7 +1392,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1444,7 +1444,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1495,7 +1495,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1543,7 +1543,7 @@ def test_shell_cmd_inputspec_10_err(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) with pytest.raises(FileNotFoundError): @@ -1579,7 +1579,7 @@ def test_shell_cmd_inputspec_11(tmp_path): ) ] - input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellSpec,)) + input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellDef,)) output_spec = SpecInfo(name="Output", fields=output_fields, bases=(ShellOutputs,)) task = ShellTask( @@ -1655,7 +1655,7 @@ def template_function(inputs): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1699,7 +1699,7 @@ def test_shell_cmd_inputspec_with_iterable(): }, ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) task = ShellTask(name="test", input_spec=input_spec, executable="test") @@ -1749,7 +1749,7 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1811,7 +1811,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1889,7 +1889,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1930,7 +1930,7 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -1965,7 +1965,7 @@ def test_shell_cmd_inputspec_typeval_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) with pytest.raises(TypeError): @@ -1981,7 +1981,7 @@ def test_shell_cmd_inputspec_typeval_2(): my_input_spec = SpecInfo( name="Input", fields=[("text", int, {"position": 1, "argstr": "", "help_string": "text"})], - bases=(ShellSpec,), + bases=(ShellDef,), ) with pytest.raises(TypeError): @@ -2003,7 +2003,7 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): {"position": 1, "help_string": "text", "mandatory": True, "argstr": ""}, ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -2042,7 +2042,7 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -2088,7 +2088,7 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -2148,7 +2148,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -2200,7 +2200,7 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf.add( @@ -2247,7 +2247,7 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf.add( @@ -2295,7 +2295,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_input_spec2 = SpecInfo( @@ -2325,7 +2325,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf.add( @@ -2392,7 +2392,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_input_spec2 = SpecInfo( @@ -2422,7 +2422,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf.add( @@ -2487,7 +2487,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_input_spec2 = SpecInfo( @@ -2517,7 +2517,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf.add( @@ -2585,7 +2585,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_input_spec2 = SpecInfo( @@ -2615,7 +2615,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf.add( @@ -2884,7 +2884,7 @@ def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): """ @attr.s(kw_only=True) - class MyOutputSpec(ShellOutputs): + class MyOutputDef(ShellOutputs): @staticmethod def gather_output(executable, output_dir): files = executable[1:] @@ -2895,7 +2895,7 @@ def gather_output(executable, output_dir): shelly = ShellTask( name="shelly", executable=["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"], - output_spec=SpecInfo(name="Output", bases=(MyOutputSpec,)), + output_spec=SpecInfo(name="Output", bases=(MyOutputDef,)), cache_dir=tmp_path, ) @@ -3015,7 +3015,7 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3091,7 +3091,7 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3288,7 +3288,7 @@ def get_lowest_directory(directory_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3421,7 +3421,7 @@ def test_shell_cmd_inputspec_outputspec_1(): {"help_string": "2nd creadted file", "argstr": "", "position": 2}, ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3475,7 +3475,7 @@ def test_shell_cmd_inputspec_outputspec_1a(): {"help_string": "2nd creadted file", "argstr": "", "position": 2}, ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3528,7 +3528,7 @@ def test_shell_cmd_inputspec_outputspec_2(): {"help_string": "2nd creadted file", "argstr": "", "position": 2}, ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3595,7 +3595,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): {"help_string": "2nd creadted file", "argstr": "", "position": 2}, ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3671,7 +3671,7 @@ def test_shell_cmd_inputspec_outputspec_3(): ), ("additional_inp", int, {"help_string": "additional inp"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3732,7 +3732,7 @@ def test_shell_cmd_inputspec_outputspec_3a(): ), ("additional_inp", str, {"help_string": "additional inp"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3801,7 +3801,7 @@ def test_shell_cmd_inputspec_outputspec_4(): ), ("additional_inp", int, {"help_string": "additional inp"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3856,7 +3856,7 @@ def test_shell_cmd_inputspec_outputspec_4a(): ), ("additional_inp", int, {"help_string": "additional inp"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3907,7 +3907,7 @@ def test_shell_cmd_inputspec_outputspec_5(): ("additional_inp_A", int, {"help_string": "additional inp A"}), ("additional_inp_B", str, {"help_string": "additional inp B"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -3961,7 +3961,7 @@ def test_shell_cmd_inputspec_outputspec_5a(): ("additional_inp_A", str, {"help_string": "additional inp A"}), ("additional_inp_B", int, {"help_string": "additional inp B"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -4015,7 +4015,7 @@ def test_shell_cmd_inputspec_outputspec_5b(): ("additional_inp_A", str, {"help_string": "additional inp A"}), ("additional_inp_B", str, {"help_string": "additional inp B"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -4067,7 +4067,7 @@ def test_shell_cmd_inputspec_outputspec_6_except(): ), ("additional_inp_A", str, {"help_string": "additional inp A"}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_output_spec = SpecInfo( @@ -4328,7 +4328,7 @@ def change_name(file): # ("output_biascorrected", bool, # attr.ib(metadata={"help_string": 'output restored image (bias-corrected image)', "argstr": '-B'})), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # TODO: not sure why this has to be string @@ -4381,7 +4381,7 @@ def test_shell_cmd_optional_output_file1(tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_cp = ShellTask( @@ -4421,7 +4421,7 @@ def test_shell_cmd_optional_output_file2(tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_cp = ShellTask( @@ -4459,7 +4459,7 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) out_spec = SpecInfo( name="Output", @@ -4521,7 +4521,7 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) out_spec = SpecInfo( name="Output", @@ -4587,7 +4587,7 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) out_spec = SpecInfo( name="Output", @@ -4654,7 +4654,7 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) out_spec = SpecInfo( name="Output", @@ -4719,7 +4719,7 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) out_spec = SpecInfo( name="Output", @@ -4773,7 +4773,7 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) out_spec = SpecInfo( name="Output", @@ -4857,7 +4857,7 @@ def spec_info(formatter): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) def formatter_1(inputs): @@ -4970,7 +4970,7 @@ def spec_info(formatter): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # asking for specific inputs @@ -5023,7 +5023,7 @@ def test_shellcommand_error_msg(tmp_path): {"help_string": "a dummy string", "argstr": "", "mandatory": True}, ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index b75c20a8a2..e3b662af40 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -6,7 +6,7 @@ from ..task import ShellTask from pydra.engine.specs import ( ShellOutputs, - ShellSpec, + ShellDef, File, ) from pydra.design import shell @@ -38,7 +38,7 @@ def test_shell_cmd_inputs_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -54,7 +54,7 @@ def test_shell_cmd_inputs_1a(): fields=[ ("inpA", attr.ib(type=str, metadata={"help_string": "inpA", "argstr": ""})) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -77,7 +77,7 @@ def test_shell_cmd_inputs_1b(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -101,7 +101,7 @@ def test_shell_cmd_inputs_1_st(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) ShellTask( @@ -135,7 +135,7 @@ def test_shell_cmd_inputs_2(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -153,7 +153,7 @@ def test_shell_cmd_inputs_2a(): ("inpA", attr.ib(type=str, metadata={"help_string": "inpA", "argstr": ""})), ("inpB", attr.ib(type=str, metadata={"help_string": "inpB", "argstr": ""})), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -187,7 +187,7 @@ def test_shell_cmd_inputs_2_err(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -220,7 +220,7 @@ def test_shell_cmd_inputs_2_noerr(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", inpA="inp1", input_spec=my_input_spec) @@ -248,7 +248,7 @@ def test_shell_cmd_inputs_3(): ), ("inpC", attr.ib(type=str, metadata={"help_string": "inpC", "argstr": ""})), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -276,7 +276,7 @@ def test_shell_cmd_inputs_argstr_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", inpA="inp1", input_spec=my_input_spec) @@ -297,7 +297,7 @@ def test_shell_cmd_inputs_argstr_2(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # separate command into exec + args @@ -321,7 +321,7 @@ def test_shell_cmd_inputs_list_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -344,7 +344,7 @@ def test_shell_cmd_inputs_list_2(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -366,7 +366,7 @@ def test_shell_cmd_inputs_list_3(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -394,7 +394,7 @@ def test_shell_cmd_inputs_list_sep_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -424,7 +424,7 @@ def test_shell_cmd_inputs_list_sep_2(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -454,7 +454,7 @@ def test_shell_cmd_inputs_list_sep_2a(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -484,7 +484,7 @@ def test_shell_cmd_inputs_list_sep_3(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -514,7 +514,7 @@ def test_shell_cmd_inputs_list_sep_3a(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -544,7 +544,7 @@ def test_shell_cmd_inputs_sep_4(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", inpA=["aaa"], input_spec=my_input_spec) @@ -569,7 +569,7 @@ def test_shell_cmd_inputs_sep_4a(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", inpA="aaa", input_spec=my_input_spec) @@ -593,7 +593,7 @@ def test_shell_cmd_inputs_format_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", inpA="aaa", input_spec=my_input_spec) @@ -617,7 +617,7 @@ def test_shell_cmd_inputs_format_2(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -645,7 +645,7 @@ def test_shell_cmd_inputs_format_3(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", inpA=0.007, input_spec=my_input_spec) @@ -670,7 +670,7 @@ def test_shell_cmd_inputs_mandatory_1(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec) @@ -714,7 +714,7 @@ def test_shell_cmd_inputs_not_given_1(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(name="shelly", executable="executable", input_spec=my_input_spec) @@ -753,7 +753,7 @@ def test_shell_cmd_inputs_template_1(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") @@ -792,7 +792,7 @@ def test_shell_cmd_inputs_template_1a(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") @@ -826,7 +826,7 @@ def test_shell_cmd_inputs_template_2(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec) @@ -904,7 +904,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -984,7 +984,7 @@ def test_shell_cmd_inputs_template_3a(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1060,7 +1060,7 @@ def test_shell_cmd_inputs_template_4(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") @@ -1087,7 +1087,7 @@ def test_shell_cmd_inputs_template_5_ex(): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec, outAB="outAB") @@ -1130,7 +1130,7 @@ def test_shell_cmd_inputs_template_6(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # no input for outA (and no default value), so the output is created whenever the @@ -1191,7 +1191,7 @@ def test_shell_cmd_inputs_template_6a(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # no input for outA, but default is False, so the outA shouldn't be used @@ -1249,7 +1249,7 @@ def test_shell_cmd_inputs_template_7(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "a_file.txt" @@ -1298,7 +1298,7 @@ def test_shell_cmd_inputs_template_7a(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "a_file.txt" @@ -1347,7 +1347,7 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "a_file.txt" @@ -1393,7 +1393,7 @@ def test_shell_cmd_inputs_template_8(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "a_file.t" @@ -1453,7 +1453,7 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "inpA.t" @@ -1515,7 +1515,7 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "inpA.t" @@ -1577,7 +1577,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "inpA.t" @@ -1641,7 +1641,7 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA_file = tmp_path / "inpA.t" @@ -1689,7 +1689,7 @@ def test_shell_cmd_inputs_template_10(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA=3.3456) @@ -1701,7 +1701,7 @@ def test_shell_cmd_inputs_template_10(): def test_shell_cmd_inputs_template_requires_1(): - """Given an input specification with a templated output file subject to required fields, + """Given an input definition with a templated output file subject to required fields, ensure the field is set only when all requirements are met.""" my_input_spec = SpecInfo( @@ -1738,7 +1738,7 @@ def test_shell_cmd_inputs_template_requires_1(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) # When requirements are not met. @@ -1787,7 +1787,7 @@ def template_fun(inputs): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") @@ -1845,7 +1845,7 @@ def template_fun(inputs): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( @@ -1890,7 +1890,7 @@ def test_shell_cmd_inputs_template_1_st(): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) inpA = ["inpA_1", "inpA_2"] @@ -2085,7 +2085,7 @@ def test_shell_cmd_inputs_denoise_image( ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) my_input_file = tmp_path / "a_file.ext" @@ -2167,7 +2167,7 @@ def test_shell_cmd_inputs_denoise_image( @shell.define -class SimpleTaskXor(ShellSpec["SimpleTaskXor.Outputs"]): +class SimpleTaskXor(ShellDef["SimpleTaskXor.Outputs"]): input_1: str = shell.arg( help_string="help", diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 7eec9b01dc..247e36dfb5 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -5,7 +5,7 @@ from ..task import ShellTask from ..submitter import Submitter -from ..specs import ShellOutputs, File, ShellSpec +from ..specs import ShellOutputs, File, ShellDef from ..environments import Singularity @@ -219,7 +219,7 @@ def test_singularity_inputspec_1(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -260,7 +260,7 @@ def test_singularity_inputspec_1a(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -317,7 +317,7 @@ def test_singularity_inputspec_2(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -377,7 +377,7 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -437,7 +437,7 @@ def test_singularity_inputspec_2a(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -494,7 +494,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -550,7 +550,7 @@ def test_singularity_inputspec_state_1(tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -600,7 +600,7 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) singu = ShellTask( @@ -643,7 +643,7 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) @@ -699,7 +699,7 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) @@ -756,7 +756,7 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): ), ) ], - bases=(ShellSpec,), + bases=(ShellDef,), ) wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 6e1bf8f95e..17ce176c8a 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -10,7 +10,7 @@ File, Runtime, Result, - ShellSpec, + ShellDef, ) from pydra.engine.workflow.lazy import ( LazyInField, @@ -29,7 +29,7 @@ def test_basespec(): - spec = BaseSpec() + spec = BaseDef() assert spec.hash == "0b1d98df22ecd1733562711c205abca2" @@ -50,8 +50,8 @@ def test_result(): def test_shellspec(): with pytest.raises(TypeError): - spec = ShellSpec() - spec = ShellSpec(executable="ls") # (executable, args) + spec = ShellDef() + spec = ShellDef(executable="ls") # (executable, args) assert hasattr(spec, "executable") assert hasattr(spec, "args") @@ -63,7 +63,7 @@ class Input: inp_b: str = "B" def __init__(self): - class InpSpec: + class InpDef: def __init__(self): self.fields = [("inp_a", int), ("inp_b", int)] @@ -73,7 +73,7 @@ def __init__(self): self.name = "tn" self.inputs = self.Input() - self.input_spec = InpSpec() + self.input_spec = InpDef() self.output_spec = Outputs() self.output_names = ["out_a"] self.state = None @@ -138,14 +138,14 @@ def test_input_file_hash_1(tmp_path): os.chdir(tmp_path) outfile = "test.file" fields = [("in_file", ty.Any)] - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) + input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseDef,)) inputs = make_klass(input_spec) assert inputs(in_file=outfile).hash == "9a106eb2830850834d9b5bf098d5fa85" with open(outfile, "w") as fp: fp.write("test") fields = [("in_file", File)] - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) + input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseDef,)) inputs = make_klass(input_spec) assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf" @@ -156,7 +156,7 @@ def test_input_file_hash_2(tmp_path): with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo(name="Inputs", fields=[("in_file", File)], bases=(BaseSpec,)) + input_spec = SpecInfo(name="Inputs", fields=[("in_file", File)], bases=(BaseDef,)) inputs = make_klass(input_spec) # checking specific hash value @@ -186,7 +186,7 @@ def test_input_file_hash_2a(tmp_path): f.write("hello") input_spec = SpecInfo( - name="Inputs", fields=[("in_file", ty.Union[File, int])], bases=(BaseSpec,) + name="Inputs", fields=[("in_file", ty.Union[File, int])], bases=(BaseDef,) ) inputs = make_klass(input_spec) @@ -221,7 +221,7 @@ def test_input_file_hash_3(tmp_path): f.write("hello") input_spec = SpecInfo( - name="Inputs", fields=[("in_file", File), ("in_int", int)], bases=(BaseSpec,) + name="Inputs", fields=[("in_file", File), ("in_int", int)], bases=(BaseDef,) ) inputs = make_klass(input_spec) @@ -279,7 +279,7 @@ def test_input_file_hash_4(tmp_path): input_spec = SpecInfo( name="Inputs", fields=[("in_file", ty.List[ty.List[ty.Union[int, File]]])], - bases=(BaseSpec,), + bases=(BaseDef,), ) inputs = make_klass(input_spec) @@ -316,7 +316,7 @@ def test_input_file_hash_5(tmp_path): input_spec = SpecInfo( name="Inputs", fields=[("in_file", ty.List[ty.Dict[ty.Any, ty.Union[File, int]]])], - bases=(BaseSpec,), + bases=(BaseDef,), ) inputs = make_klass(input_spec) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index acea1497ad..5a7e0d6311 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -17,7 +17,7 @@ MultiOutputObj, ) from ..specs import ( - ShellSpec, + ShellDef, File, ) from pydra.utils.hash import hash_function @@ -592,7 +592,7 @@ def testfunc(a): my_input_spec = SpecInfo( name="Input", fields=[("a", attr.ib(type=float, metadata={"help_string": "input a"}))], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=3.5, input_spec=my_input_spec) @@ -611,7 +611,7 @@ def testfunc(a): my_input_spec = SpecInfo( name="Input", fields=[("a", attr.ib(type=int, metadata={"help_string": "input a"}))], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) with pytest.raises(TypeError): testfunc(a=3.5, input_spec=my_input_spec) @@ -634,7 +634,7 @@ def testfunc(a): attr.ib(type=float, metadata={"position": 1, "help_string": "input a"}), ) ], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) with pytest.raises(AttributeError, match="only these keys are supported"): testfunc(a=3.5, input_spec=my_input_spec) @@ -649,7 +649,7 @@ def test_input_spec_func_1d_except(): def testfunc(a): return a - my_input_spec = SpecInfo(name="Input", fields=[], bases=(FunctionSpec,)) + my_input_spec = SpecInfo(name="Input", fields=[], bases=(FunctionDef,)) funky = testfunc(a=3.5, input_spec=my_input_spec) with pytest.raises(TypeError, match="missing 1 required positional argument"): funky() @@ -667,7 +667,7 @@ def testfunc(a: int): my_input_spec = SpecInfo( name="Input", fields=[("a", attr.ib(type=float, metadata={"help_string": "input a"}))], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=3.5, input_spec=my_input_spec) @@ -687,7 +687,7 @@ def testfunc(a: int): my_input_spec = SpecInfo( name="Input", fields=[("a", float, {"help_string": "input a"})], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=3.5, input_spec=my_input_spec) @@ -714,7 +714,7 @@ def testfunc(a): ), ) ], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=2, input_spec=my_input_spec) @@ -741,7 +741,7 @@ def testfunc(a): ), ) ], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) with pytest.raises(ValueError, match="value of a has to be"): @@ -773,7 +773,7 @@ def testfunc(a, b=1): ), ), ], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=2, input_spec=my_input_spec) @@ -801,7 +801,7 @@ def testfunc(a, b=1): ), ("b", attr.ib(type=int, default=10, metadata={"help_string": "input b"})), ], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=2, input_spec=my_input_spec) @@ -823,7 +823,7 @@ def testfunc(a): fields=[ ("a", attr.ib(type=MultiInputObj, metadata={"help_string": "input a"})) ], - bases=(FunctionSpec,), + bases=(FunctionDef,), ) funky = testfunc(a=3.5, input_spec=my_input_spec) @@ -842,7 +842,7 @@ def testfunc(a): my_output_spec = SpecInfo( name="Output", fields=[("out1", attr.ib(type=float, metadata={"help_string": "output"}))], - bases=(BaseSpec,), + bases=(BaseDef,), ) funky = testfunc(a=3.5, output_spec=my_output_spec) @@ -862,7 +862,7 @@ def testfunc(a): my_output_spec = SpecInfo( name="Output", fields=[("out1", attr.ib(type=int, metadata={"help_string": "output"}))], - bases=(BaseSpec,), + bases=(BaseDef,), ) funky = testfunc(a=3.5, output_spec=my_output_spec) @@ -882,7 +882,7 @@ def testfunc(a) -> int: my_output_spec = SpecInfo( name="Output", fields=[("out1", attr.ib(type=float, metadata={"help_string": "output"}))], - bases=(BaseSpec,), + bases=(BaseDef,), ) funky = testfunc(a=3.5, output_spec=my_output_spec) @@ -903,7 +903,7 @@ def testfunc(a) -> int: my_output_spec = SpecInfo( name="Output", fields=[("out1", float, {"help_string": "output"})], - bases=(BaseSpec,), + bases=(BaseDef,), ) funky = testfunc(a=3.5, output_spec=my_output_spec) @@ -928,7 +928,7 @@ def testfunc(a, b): attr.ib(type=MultiOutputObj, metadata={"help_string": "output"}), ) ], - bases=(BaseSpec,), + bases=(BaseDef,), ) funky = testfunc(a=3.5, b=1, output_spec=my_output_spec) @@ -953,7 +953,7 @@ def testfunc(a): attr.ib(type=MultiOutputObj, metadata={"help_string": "output"}), ) ], - bases=(BaseSpec,), + bases=(BaseDef,), ) funky = testfunc(a=3.5, output_spec=my_output_spec) @@ -1135,7 +1135,7 @@ def test_audit_shellcommandtask_file(tmp_path): ), ), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) shelly = ShellTask( name="shelly", diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index cfbaa4ad6f..9384e1de60 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -35,7 +35,7 @@ ) from ..submitter import Submitter from pydra.design import python -from ..specs import ShellSpec +from ..specs import ShellDef from pydra.utils import exc_info_matches @@ -51,7 +51,7 @@ def test_wf_specinfo_input_spec(): ("a", str, "", {"mandatory": True}), ("b", dict, {"foo": 1, "bar": False}, {"mandatory": False}), ], - bases=(BaseSpec,), + bases=(BaseDef,), ) wf = Workflow( name="workflow", @@ -66,10 +66,10 @@ def test_wf_specinfo_input_spec(): fields=[ ("a", str, {"mandatory": True}), ], - bases=(ShellSpec,), + bases=(ShellDef,), ) with pytest.raises( - ValueError, match="Provided SpecInfo must have BaseSpec as its base." + ValueError, match="Provided SpecInfo must have BaseDef as its base." ): Workflow(name="workflow", input_spec=bad_input_spec) @@ -243,7 +243,7 @@ def test_wf_1_call_exception(plugin, tmpdir): with Submitter(plugin=plugin) as sub: with pytest.raises(Exception) as e: wf(submitter=sub, plugin=plugin) - assert "Specify submitter OR plugin" in str(e.value) + assert "Defify submitter OR plugin" in str(e.value) def test_wf_1_inp_in_call(tmpdir): diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index e6bf3cced7..8dd09e0849 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -941,10 +941,10 @@ def make_spec(self, cmd=None, arg=None): Returns ------- - psij.JobSpec + psij.JobDef PSI/J job specification. """ - spec = self.psij.JobSpec() + spec = self.psij.JobDef() spec.executable = cmd spec.arguments = arg @@ -956,7 +956,7 @@ def make_job(self, spec, attributes): Parameters ---------- - spec : psij.JobSpec + spec : psij.JobDef PSI/J job specification. attributes : any Job attributes. diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index cf649ebd4a..ebe6835311 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -4,7 +4,7 @@ from typing_extensions import Self import attrs from pydra.engine.helpers import list_fields, attrs_values, is_lazy -from pydra.engine.specs import TaskSpec, TaskOutputs, WorkflowOutputs +from pydra.engine.specs import TaskDef, TaskOutputs, WorkflowOutputs from .lazy import LazyInField from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray @@ -17,29 +17,29 @@ @attrs.define(auto_attribs=False) class Workflow(ty.Generic[WorkflowOutputsType]): - """A workflow, constructed from a workflow specification + """A workflow, constructed from a workflow definition Parameters ---------- name : str The name of the workflow - inputs : TaskSpec - The input specification of the workflow - outputs : TaskSpec - The output specification of the workflow + inputs : TaskDef + The input definition of the workflow + outputs : TaskDef + The output definition of the workflow """ name: str = attrs.field() - inputs: TaskSpec[WorkflowOutputsType] = attrs.field() + inputs: TaskDef[WorkflowOutputsType] = attrs.field() outputs: WorkflowOutputsType = attrs.field() _nodes: dict[str, Node] = attrs.field(factory=dict) @classmethod def construct( cls, - spec: TaskSpec[WorkflowOutputsType], + spec: TaskDef[WorkflowOutputsType], ) -> Self: - """Construct a workflow from a specification, caching the constructed worklow""" + """Construct a workflow from a definition, caching the constructed worklow""" lazy_inputs = [f for f in list_fields(type(spec)) if f.lazy] @@ -129,21 +129,21 @@ def clear_cache(cls): """Clear the cache of constructed workflows""" cls._constructed.clear() - def add(self, task_spec: TaskSpec[OutputsType], name=None) -> OutputsType: + def add(self, task_spec: TaskDef[OutputsType], name=None) -> OutputsType: """Add a node to the workflow Parameters ---------- - task_spec : TaskSpec - The specification of the task to add to the workflow as a node + task_spec : TaskDef + The definition of the task to add to the workflow as a node name : str, optional - The name of the node, by default it will be the name of the task specification + The name of the node, by default it will be the name of the task definition class Returns ------- OutputType - The outputs specification of the node + The outputs definition of the node """ if name is None: name = type(task_spec).__name__ diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 2a3daef980..2920e5a07e 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -5,7 +5,7 @@ import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy -from ..specs import TaskSpec, TaskOutputs, WorkflowSpec +from ..specs import TaskDef, TaskOutputs, WorkflowDef from ..task import Task from ..helpers import ensure_list, attrs_values, is_lazy, load_result, create_checksum from pydra.utils.hash import hash_function @@ -32,12 +32,12 @@ class Node(ty.Generic[OutputType]): ---------- name : str The name of the node - inputs : TaskSpec - The specification of the node + inputs : TaskDef + The definition of the node """ name: str - _spec: TaskSpec[OutputType] + _spec: TaskDef[OutputType] _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False) _lzout: OutputType | None = attrs.field( init=False, default=None, eq=False, hash=False @@ -148,7 +148,7 @@ def split( Returns ------- - self : TaskSpec + self : TaskDef a reference to the task """ self._check_if_outputs_have_been_used("the node cannot be split or combined") @@ -222,7 +222,7 @@ def combine( Returns ------- - self : TaskSpec + self : TaskDef a reference to the task """ if not isinstance(combiner, (str, list)): @@ -345,7 +345,7 @@ def _checksum_states(self, state_index=None): # that might be important for outer splitter of input variable with big files # the file can be changed with every single index even if there are only two files input_hash = inputs_copy.hash - if isinstance(self._spec, WorkflowSpec): + if isinstance(self._spec, WorkflowDef): con_hash = hash_function(self._connections) # TODO: hash list is not used hash_list = [input_hash, con_hash] # noqa: F841 diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 2cd5ad357e..8bf9932923 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -31,7 +31,7 @@ def generic_func_task(in_file: File) -> File: @shell.define -class GenericShellTask(specs.ShellSpec["GenericShellTask.Outputs"]): +class GenericShellTask(specs.ShellDef["GenericShellTask.Outputs"]): """class with customized input and executables""" in_file: File = shell.arg( @@ -57,7 +57,7 @@ def specific_func_task(in_file: MyFormatX) -> MyFormatX: @shell.define -class SpecificShellTask(specs.ShellSpec["SpecificShellTask.Outputs"]): +class SpecificShellTask(specs.ShellDef["SpecificShellTask.Outputs"]): executable = "echo" in_file: MyFormatX = shell.arg( diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 2ce2efd1ff..976f59b431 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -409,7 +409,7 @@ def coerce_obj(obj, type_): try: return expand_and_coerce(object_, self.pattern) except TypeError as e: - # Special handling for MultiInputObjects (which are annoying) + # Defial handling for MultiInputObjects (which are annoying) if isinstance(self.pattern, tuple) and self.pattern[0] == MultiInputObj: # Attempt to coerce the object into arg type of the MultiInputObj first, # and if that fails, try to coerce it into a list of the arg type @@ -588,7 +588,7 @@ def check_sequence(tp_args, pattern_args): try: return expand_and_check(type_, self.pattern) except TypeError as e: - # Special handling for MultiInputObjects (which are annoying) + # Defial handling for MultiInputObjects (which are annoying) if not isinstance(self.pattern, tuple) or self.pattern[0] != MultiInputObj: raise e # Attempt to coerce the object into arg type of the MultiInputObj first, From e317e8ef896fdcf3406b791630192e6e8cc82398 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 22:27:43 +1100 Subject: [PATCH 110/342] finished getting-started (apart from debugging) tutorial --- .../source/tutorial/getting-started.ipynb | 111 ++++++++++++++++-- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index 1e91d0960a..4ae22d0ed4 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -6,15 +6,18 @@ "source": [ "# Getting started\n", "\n", - "## Running your first task\n", - "\n", "The basic runnable component of Pydra is a *task*. Tasks are conceptually similar to\n", - "functions, in that they take inputs, process them and then return results. However,\n", + "functions, in that they take inputs, operate on them and then return results. However,\n", "unlike functions, tasks are parameterised before they are executed in a separate step.\n", "This enables parameterised tasks to be linked together into workflows that are checked for\n", "errors before they are executed, and modular execution workers and environments to specified\n", "independently of the task being performed.\n", "\n", + "Tasks can encapsulate Python functions, shell-commands or workflows constructed from\n", + "task components.\n", + "\n", + "## Running your first task\n", + "\n", "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). Pre-define task definitions are run by\n", "\n", @@ -22,12 +25,12 @@ "* instantiate the class with the parameters of the task\n", "* \"call\" resulting object to execute it as you would a function (i.e. with the `my_task(...)`)\n", "\n", - "To demonstrate with a toy example, of loading a JSON file with the `pydra.tasks.common.LoadJson` task, this we first create an example JSON file" + "To demonstrate with a toy example of loading a JSON file with the `pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +38,7 @@ "from tempfile import mkdtemp\n", "import json\n", "\n", - "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]}\n", + "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", "\n", "test_dir = Path(mkdtemp())\n", "json_file = test_dir / \"test.json\"\n", @@ -70,6 +73,19 @@ "assert result.output.out == JSON_CONTENTS" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Result` object contains\n", + "\n", + "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", + "* `runtime`: information about the peak memory and CPU usage\n", + "* `errored`: the error status of the task\n", + "* `task`: the task object that generated the results\n", + "* `output_dir`: the output directory the results are stored in" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -143,14 +159,14 @@ "# as the number of NIfTI files\n", "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", "\n", - "mrgrid_varying_sizes = MrGrid().split(\n", + "mrgrid_varying_vox_sizes = MrGrid().split(\n", " (\"input\", \"voxel\"),\n", " input=nifti_dir.iterdir(),\n", " voxel=VOXEL_SIZES\n", ")\n", "\n", "# Run the task to resample all NIfTI files with different voxel sizes\n", - "result = mrgrid()" + "result = mrgrid_varying_vox_sizes(cache_dir=test_dir / \"cache\")" ] }, { @@ -159,7 +175,84 @@ "source": [ "## Cache directories\n", "\n", - "When a task runs, a hash is generated by the combination of all the inputs to the task and the task to be run." + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", + "task and the operation to be performed. This hash is used to name the output directory for\n", + "the task within the specified cache directory. Therefore, if you use the same cache\n", + "directory between runs and in a subsequent run the same task is executed with the same\n", + "inputs then the location of its output directory will also be the same, and the outputs\n", + "generated by the previous run are reused." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "result1 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result1.output_dir == result.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "result2 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result2.output_dir != result.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "first_file = next(nifti_dir.iterdir())\n", + "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", + "\n", + "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "result3 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result3.output_dir == result.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debugging\n", + "\n", + "Work in progress..." ] }, { From 720efdba3adbca9b7510bd241c8b39b8c5a81e84 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 23:18:36 +1100 Subject: [PATCH 111/342] imported examples from tutorials --- .../source/examples/first-level-glm.ipynb | 760 ++++++++++ new-docs/source/examples/two-level-glm.ipynb | 1241 +++++++++++++++++ new-docs/source/index.rst | 2 + .../source/tutorial/getting-started.ipynb | 111 +- pyproject.toml | 2 +- 5 files changed, 2013 insertions(+), 103 deletions(-) create mode 100644 new-docs/source/examples/first-level-glm.ipynb create mode 100644 new-docs/source/examples/two-level-glm.ipynb diff --git a/new-docs/source/examples/first-level-glm.ipynb b/new-docs/source/examples/first-level-glm.ipynb new file mode 100644 index 0000000000..a99dad8458 --- /dev/null +++ b/new-docs/source/examples/first-level-glm.ipynb @@ -0,0 +1,760 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c8149a94", + "metadata": {}, + "source": [ + "# First Level GLM (from Nilearn)" + ] + }, + { + "cell_type": "markdown", + "id": "b54b132a", + "metadata": {}, + "source": [ + "In this tutorial, we will go through a simple workflow of the first level general linear modeling with a BIDS dataset from openneuro. This analysis is only performed on **one** subject.\n", + "\n", + "This tutorial is based on the [Nilearn GLM tutorial](https://nilearn.github.io/stable/auto_examples/04_glm_first_level/plot_bids_features.html#sphx-glr-auto-examples-04-glm-first-level-plot-bids-features-py)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f514ffe", + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "8313a041", + "metadata": {}, + "source": [ + "## Preparation\n", + "\n", + "Import packages that will be used globally and set up output directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72d1dfdd", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "import sys \n", + "if not sys.warnoptions:\n", + " warnings.simplefilter(\"ignore\")\n", + " \n", + "import os\n", + "import typing as ty\n", + "from pathlib import Path\n", + "\n", + "from pydra.design import python, workflow\n", + "from fileformats.generic import File, Directory\n", + "from fileforamts.text import Csv\n", + "import pandas as pd\n", + "from scipy.stats import norm\n", + "\n", + "import nibabel as nib\n", + "from nilearn.datasets import (\n", + " fetch_openneuro_dataset_index,\n", + " fetch_openneuro_dataset,\n", + " select_from_index,\n", + " )\n", + "from nilearn.interfaces.fsl import get_design_from_fslmat\n", + "from nilearn.glm.first_level import first_level_from_bids\n", + "from nilearn.reporting import get_clusters_table, make_glm_report\n", + "from nilearn.plotting import (\n", + " plot_glass_brain,\n", + " plot_img_comparison,\n", + " plot_contrast_matrix,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5716cb50", + "metadata": {}, + "outputs": [], + "source": [ + "# get current directory\n", + "pydra_tutorial_dir = os.path.dirname(os.getcwd())\n", + "\n", + "# set up output directory\n", + "workflow_dir = Path(pydra_tutorial_dir) / 'outputs'\n", + "workflow_out_dir = workflow_dir / '6_glm'\n", + "\n", + "# create the output directory if not exit\n", + "os.makedirs(workflow_out_dir, exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1878928b", + "metadata": {}, + "outputs": [], + "source": [ + "workflow_out_dir" + ] + }, + { + "cell_type": "markdown", + "id": "6cafd6a1", + "metadata": {}, + "source": [ + "## Create tasks\n", + "\n", + "In this section, we converte major steps into tasks.\n", + "Each pydra task can have multiple python functions. We recommend to put those logically more related functions into the same task.\n", + "\n", + "It is very **important** to keep in mind what adjacent tasks of your current task will be.\n", + "1. Your previous task will decide your arguments in the current task\n", + "2. Your next task will be impacted by the returns in the current task" + ] + }, + { + "cell_type": "markdown", + "id": "823780ab", + "metadata": {}, + "source": [ + "### fetch openneuro BIDS dataset\n", + "\n", + "In this task, we do the following:\n", + "1. get openneuro dataset index\n", + "2. specify exclusion patterns and number of subjects\n", + "3. download the data we need\n", + "\n", + "\n", + "**Notes:** Here we still use `n_subjects` as an argument. Given that we will only analyze one subject, you can also remove this argument and specify `n_subjects =1` in `select_from_index`. If you do, do not forget to modify the argument in the workflow later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2ab134c", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"data_dir\"])\n", + "def GetOpenneuroDataset(exclusion_patterns: list, n_subjects: int) -> str:\n", + " _, urls = fetch_openneuro_dataset_index()\n", + " urls = select_from_index(\n", + " urls, exclusion_filters=exclusion_patterns, n_subjects=n_subjects\n", + " )\n", + " data_dir, _ = fetch_openneuro_dataset(urls=urls)\n", + " return data_dir" + ] + }, + { + "cell_type": "markdown", + "id": "1b4899de", + "metadata": {}, + "source": [ + "### obtain FirstLevelModel objects automatically and fit arguments\n", + "\n", + "To get the first level model(s) we have to specify\n", + "1. the dataset directory\n", + "2. the task_label\n", + "3. the space_label\n", + "4. the folder with the desired derivatives (fMRIPrep)\n", + "\n", + "In our case, we only have one subject so we will only have one first level model.\n", + "Then, for this model, we will obtain\n", + "1. the list of run images\n", + "2. events\n", + "3. confound regressors\n", + "\n", + "Those are inferred from the confounds.tsv files available in the BIDS dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c2710dc", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"model\", \"imgs\", \"subject\"])\n", + "def GetInfoFromBids(\n", + " data_dir: Directory,\n", + " task_label: str,\n", + " space_label: str,\n", + " smoothing_fwhm: float,\n", + " derivatives_folder: Directory,\n", + ") -> ty.Tuple[ty.Any, list, str]:\n", + " (\n", + " models,\n", + " models_run_imgs,\n", + " models_events,\n", + " models_confounds,\n", + " ) = first_level_from_bids(\n", + " dataset_path=data_dir,\n", + " task_label=task_label,\n", + " space_label=space_label,\n", + " smoothing_fwhm=smoothing_fwhm,\n", + " derivatives_folder=derivatives_folder,\n", + " )\n", + " model, imgs, events, confounds = (\n", + " models[0],\n", + " models_run_imgs[0],\n", + " models_events[0],\n", + " models_confounds[0],\n", + " )\n", + " subject = 'sub-' + model.subject_label\n", + " return model, imgs, subject" + ] + }, + { + "cell_type": "markdown", + "id": "e5af99cb", + "metadata": {}, + "source": [ + "### Get design matrix\n", + "\n", + "This task does the following:\n", + "1. read the design matrix in `.mat`\n", + "2. rename the column\n", + "3. save the new design matrix as `.csv`\n", + "\n", + "**Think:** What if we don't save the new design matrix, but `return` it directly? In other words, we `return` a `pandas.DataFrame` instead of a `path`. What will happen? Worth a try :)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2bdfcfd9", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"dm_path\"])\n", + "def GetDesignMatrix(data_dir: Directory, subject: str) -> Csv:\n", + " fsl_design_matrix_path = data_dir.joinpath(\n", + " 'derivatives',\n", + " 'task',\n", + " subject,\n", + " 'stopsignal.feat',\n", + " 'design.mat',\n", + " )\n", + " design_matrix = get_design_from_fslmat(\n", + " fsl_design_matrix_path, column_names=None\n", + " )\n", + "\n", + " design_columns = [\n", + " 'cond_%02d' % i for i in range(len(design_matrix.columns))\n", + " ]\n", + " design_columns[0] = 'Go'\n", + " design_columns[4] = 'StopSuccess'\n", + " design_matrix.columns = design_columns\n", + " dm_path = Path('designmatrix.csv')\n", + " design_matrix.to_csv(dm_path, index=None)\n", + " return dm_path" + ] + }, + { + "cell_type": "markdown", + "id": "e1cb37d0", + "metadata": {}, + "source": [ + "### Fit the first level model\n", + "\n", + "What we are doing here is:\n", + "1. use the design matrix to fit the first level model\n", + "2. compute the contrast\n", + "3. save the z_map and masker for further use\n", + "4. generate a glm report (HTML file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65cec504", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"model\", \"z_map_path\", \"masker\", \"glm_report_file\"])\n", + "def ModelFit(model, imgs, dm_path, contrast: str) -> ty.Tuple[ty.Any, str, ty.Any, str]:\n", + " design_matrix = pd.read_csv(dm_path)\n", + " model.fit(imgs, design_matrices=[design_matrix])\n", + " z_map = model.compute_contrast(contrast)\n", + " z_map_path = Path('firstlevel_z_map.nii.gz')\n", + " z_map.to_filename(z_map_path)\n", + " masker_path = Path('firstlevel_masker.nii.gz')\n", + " masker = model.masker_\n", + " glm_report_file = Path('glm_report.html')\n", + " report = make_glm_report(model, contrast)\n", + " report.save_as_html(glm_report_file)\n", + " return model, z_map_path, masker, glm_report_file" + ] + }, + { + "cell_type": "markdown", + "id": "05576ba4", + "metadata": {}, + "source": [ + "### Get cluster table\n", + "\n", + "For publication purposes, we obtain a cluster table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4a86a6f", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"output_file\"])\n", + "def ClusterTable(z_map_path: File) -> Csv:\n", + " stat_img = nib.load(z_map_path)\n", + " output_file = Path('cluster_table.csv')\n", + " df = get_clusters_table(\n", + " stat_img, stat_threshold=norm.isf(0.001), cluster_threshold=10\n", + " )\n", + " df.to_csv(output_file, index=None)\n", + " return output_file" + ] + }, + { + "cell_type": "markdown", + "id": "c1e8effd", + "metadata": {}, + "source": [ + "### Make plots\n", + "\n", + "Here we want to make some plots to display our results and compare the result from FSL.\n", + "1. plot nilearn z-map\n", + "2. plot fsl z-map\n", + "3. plot nilearn and fsl comparison\n", + "4. plot design matrix contrast\n", + "\n", + "You can also separate this task into multiple sub-tasks. But it makes more sense to put them into one task as they use the same files and function `nilearn.plotting` repeatedly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0f78107", + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"output_file1\", \"output_file2\", \"output_file3\", \"output_file4\"])\n", + "def Plots(\n", + " data_dir: Directory,\n", + " dm_path: File,\n", + " z_map_path: File,\n", + " contrast: str,\n", + " subject: str,\n", + " masker\n", + ") -> ty.Tuple[str, str, str, str]:\n", + " # plot and save nilearn z-map\n", + " z_map = nib.load(z_map_path)\n", + " output_file1 = Path('nilearn_z_map.jpg')\n", + " plot_glass_brain(\n", + " z_map,\n", + " output_file=output_file1,\n", + " colorbar=True,\n", + " threshold=norm.isf(0.001),\n", + " title='Nilearn Z map of \"StopSuccess - Go\" (unc p<0.001)',\n", + " plot_abs=False,\n", + " display_mode='ortho',\n", + " )\n", + "\n", + " # plot and save fsl z-map\n", + " fsl_z_map = nib.load(\n", + " os.path.join(\n", + " data_dir,\n", + " 'derivatives',\n", + " 'task',\n", + " subject,\n", + " 'stopsignal.feat',\n", + " 'stats',\n", + " 'zstat12.nii.gz',\n", + " )\n", + " )\n", + " output_file2 = Path('fsl_z_map.jpg')\n", + " plot_glass_brain(\n", + " fsl_z_map,\n", + " output_file=output_file2,\n", + " colorbar=True,\n", + " threshold=norm.isf(0.001),\n", + " title='FSL Z map of \"StopSuccess - Go\" (unc p<0.001)',\n", + " plot_abs=False,\n", + " display_mode='ortho',\n", + " )\n", + "\n", + " # plot and save nilearn and fsl comparison\n", + " plot_img_comparison(\n", + " [z_map],\n", + " [fsl_z_map],\n", + " masker,\n", + " output_dir=workflow_out_dir,\n", + " ref_label='Nilearn',\n", + " src_label='FSL',\n", + " )\n", + " old = Path('0000.png')\n", + " new = Path('nilearn_fsl_comp.jpg')\n", + " os.rename(old, new)\n", + " output_file3 = new\n", + " print(output_file3)\n", + "\n", + " # plot and save design matrix contrast\n", + " design_matrix = pd.read_csv(dm_path)\n", + " output_file4 = Path('firstlevel_contrast.jpg')\n", + " plot_contrast_matrix(contrast, design_matrix, output_file=output_file4)\n", + " return output_file1, output_file2, output_file3, output_file4" + ] + }, + { + "cell_type": "markdown", + "id": "12a99b96", + "metadata": {}, + "source": [ + "## Make a workflow from tasks\n", + "\n", + "Now we have created all tasks we need for this first level analysis, and there are two choices for our next step.\n", + "1. create one workflow to connect all tasks together\n", + "2. create sub-workflows with some closely related tasks, and connect these workflows along with other tasks into a larger workflow.\n", + "\n", + "We recommend the second approach as it is always a good practice to group tasks, especially when there are a large number of tasks in the analysis.\n", + "\n", + "Our analysis can be divided into three parts: (1) get/read the data, (2) analyze the data, and (3) plot the result, where (1) and (3) only have one task each. So we can put all tasks in (2) into one workflow and name it as `firstlevel` or whatever you prefer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e79e9b1", + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs=[\"z_map\", \"masker\", \"subject\", \"dm_path\", \"cluster_table\", \"glm_report\"])\n", + "def FirstLevelWorkflow(\n", + " data_dir: Directory,\n", + " contrast: str,\n", + " output_dir: Path,\n", + " task_label: str = 'stopsignal',\n", + " space_label: str = 'MNI152NLin2009cAsym',\n", + " derivatives_folder: str = 'derivatives/fmriprep',\n", + " smoothing_fwhm: float = 5.0,\n", + ") -> ty.Tuple[str, str, str, File, str, str]:\n", + "\n", + " # add task - get_info_from_bids\n", + " get_info_from_bids = workflow.add(\n", + " GetInfoFromBids(\n", + " data_dir=data_dir,\n", + " task_label=task_label,\n", + " space_label=space_label,\n", + " derivatives_folder=derivatives_folder,\n", + " smoothing_fwhm=smoothing_fwhm,\n", + " )\n", + " )\n", + " # add task - get_designmatrix\n", + " get_designmatrix = workflow.add(\n", + " GetDesignMatrix(\n", + " data_dir=data_dir,\n", + " subject=get_info_from_bids.subject,\n", + " )\n", + " )\n", + " l1estimation = workflow.add(\n", + " ModelFit(\n", + " model=get_info_from_bids.model,\n", + " imgs=get_info_from_bids.imgs,\n", + " dm_path=get_designmatrix.dm_path,\n", + " contrast=contrast,\n", + " )\n", + " )\n", + " # add task - cluster_table\n", + " cluster_table = workflow.add(\n", + " ClusterTable(\n", + " z_map_path=l1estimation.z_map_path,\n", + " )\n", + " )\n", + " # specify output\n", + " return (\n", + " l1estimation.z_map_path,\n", + " l1estimation.masker,\n", + " get_info_from_bids.subject,\n", + " get_designmatrix.dm_path,\n", + " cluster_table.output_file,\n", + " l1estimation.glm_report_file,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "657690ea", + "metadata": {}, + "source": [ + "## The overaching workflow\n", + "\n", + "Connect other tasks and the above workflow into one\n", + "\n", + "Now we need to create the overaching glm workflow that connects the above workflow and other tasks (e.g., `get/read the data` and `plot the result`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d055c5d0", + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs=[\"output1\", \"output2\", \"output3\", \"output4\"])\n", + "def FullWorkflow(\n", + " output_dir: Path,\n", + " n_subjects: int = 1,\n", + " contrast: str = 'StopSuccess - Go',\n", + " exclusion_patterns: list[str] | None = None,\n", + ") -> tuple[ty.Any, ty.Any, ty.Any, ty.Any]:\n", + " if exclusion_patterns is None:\n", + " exclusion_patterns = [\n", + " '*group*',\n", + " '*phenotype*',\n", + " '*mriqc*',\n", + " '*parameter_plots*',\n", + " '*physio_plots*',\n", + " '*space-fsaverage*',\n", + " '*space-T1w*',\n", + " '*dwi*',\n", + " '*beh*',\n", + " '*task-bart*',\n", + " '*task-rest*',\n", + " '*task-scap*',\n", + " '*task-task*',\n", + " ]\n", + "\n", + " get_openneuro_dataset = workflow.add(\n", + " GetOpenneuroDataset(\n", + " exclusion_patterns=exclusion_patterns,\n", + " n_subjects=n_subjects,\n", + " )\n", + " )\n", + "\n", + " wf_firstlevel = workflow.add(\n", + " FirstLevelWorkflow(\n", + " data_dir=get_openneuro_dataset.data_dir,\n", + " contrast=contrast,\n", + " output_dir=output_dir,\n", + " )\n", + " )\n", + "\n", + " plots = workflow.add(\n", + " Plots(\n", + " data_dir=get_openneuro_dataset.data_dir,\n", + " dm_path=wf_firstlevel.dm_path,\n", + " z_map_path=wf_firstlevel.z_map,\n", + " contrast=contrast,\n", + " subject=wf_firstlevel.subject,\n", + " masker=wf_firstlevel.masker,\n", + " )\n", + " )\n", + "\n", + " return (\n", + " plots.output_file1,\n", + " plots.output_file2,\n", + " plots.output_file3,\n", + " plots.output_file4,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "1b2e9a46", + "metadata": {}, + "source": [ + "## Run Workflow Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a90088e", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "wf = FullWorkflow(output_dir=workflow_out_dir, n_subjects=1, contrast='StopSuccess - Go')\n", + "\n", + "results = wf(plugin='cf', n_procs=4)\n", + "\n", + "print(results)" + ] + }, + { + "cell_type": "markdown", + "id": "f540cdd4", + "metadata": {}, + "source": [ + "## Visualization" + ] + }, + { + "cell_type": "markdown", + "id": "e8def869", + "metadata": {}, + "source": [ + "If you arrive here without any errors, yay, you just made your first pydra workflow for a first-level GLM!" + ] + }, + { + "cell_type": "markdown", + "id": "9b0585e3", + "metadata": {}, + "source": [ + "## Examine folder structure\n", + "\n", + "Let's take a look at what you have got." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75c1cfc9", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "!ls ../outputs/6_glm" + ] + }, + { + "cell_type": "markdown", + "id": "56aeee0c", + "metadata": {}, + "source": [ + "### Plot figures" + ] + }, + { + "cell_type": "markdown", + "id": "dad22ca7", + "metadata": {}, + "source": [ + "#### First level contrast" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f657571", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "\n", + "Image(filename='../outputs/6_glm/firstlevel_contrast.jpg')" + ] + }, + { + "cell_type": "markdown", + "id": "0cdfcc29", + "metadata": {}, + "source": [ + "#### Nilearn Z map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f08aa59f", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "Image(filename='../outputs/6_glm/nilearn_z_map.jpg')" + ] + }, + { + "cell_type": "markdown", + "id": "ca1b896f", + "metadata": {}, + "source": [ + "#### FSL Z map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d18b6ed", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "Image(filename='../outputs/6_glm/fsl_z_map.jpg')" + ] + }, + { + "cell_type": "markdown", + "id": "fc68e7dc", + "metadata": {}, + "source": [ + "#### Nilearn FSL comparison" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a37679ff", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "Image(filename='../outputs/6_glm/nilearn_fsl_comp.jpg')" + ] + }, + { + "cell_type": "markdown", + "id": "081bf13a", + "metadata": {}, + "source": [ + "## Exercise" + ] + }, + { + "cell_type": "markdown", + "id": "a3d55272", + "metadata": {}, + "source": [ + "What if we need to run the first-level GLM on multiple subject? We will need the `splitter`.\n", + "\n", + "So, where should we add `.split`?" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/new-docs/source/examples/two-level-glm.ipynb b/new-docs/source/examples/two-level-glm.ipynb new file mode 100644 index 0000000000..c21efbb25c --- /dev/null +++ b/new-docs/source/examples/two-level-glm.ipynb @@ -0,0 +1,1241 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "058a991d", + "metadata": {}, + "source": [ + "# Two-Level GLM (from Nilearn)" + ] + }, + { + "cell_type": "markdown", + "id": "2b4f98e0", + "metadata": {}, + "source": [ + "In this tutorial, we demonstrate how to write pydra tasks for the first level (subject-level) GLM and the second level (group-level) GLM in Nilearn. We use the data from [Balloon Analog Risk-taking Task](https://openneuro.org/datasets/ds000001/versions/1.0.0). \n", + "Basic information about this dataset:\n", + "- 16 subjects\n", + "- 3 runs\n", + "- functional scan TR: 2.3 \n", + "- num of functional scan: 300" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b047e37b", + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "897522ee", + "metadata": {}, + "source": [ + "## Preparation\n", + "\n", + "Import packages that will be used globally and set up output directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c4743db", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "import sys \n", + "if not sys.warnoptions:\n", + " warnings.simplefilter(\"ignore\")\n", + "\n", + "import os, glob\n", + "import datetime\n", + "import random\n", + "import pydra\n", + "from pydra import Workflow\n", + "from pydra.engine.specs import File, MultiInputFile, MultiOutputFile\n", + "import typing as ty\n", + "from pathlib import Path\n", + "import datalad.api as dl\n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import nibabel as nib\n", + "from scipy.stats import norm\n", + "from nilearn.interfaces.fmriprep import load_confounds_strategy\n", + "from nilearn.image import load_img, get_data, math_img, threshold_img\n", + "from nilearn.glm.first_level import make_first_level_design_matrix, FirstLevelModel\n", + "from nilearn.glm.second_level import SecondLevelModel, non_parametric_inference\n", + "from nilearn.glm.contrasts import compute_fixed_effects\n", + "from nilearn.plotting import plot_stat_map, plot_glass_brain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "270ec541", + "metadata": {}, + "outputs": [], + "source": [ + "# get current directory\n", + "pydra_tutorial_dir = os.path.dirname(os.getcwd())\n", + "\n", + "# set up output directory\n", + "workflow_dir = Path(pydra_tutorial_dir) / 'outputs'\n", + "workflow_out_dir = workflow_dir / '9_glm' /'results'\n", + "\n", + "# create folders if not exit\n", + "os.makedirs(workflow_out_dir, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "id": "55fad822", + "metadata": {}, + "source": [ + "### Download the data\n", + "\n", + "[DataLad](http://handbook.datalad.org/en/latest/index.htmlhttp://handbook.datalad.org/en/latest/index.html) is often used in those cases to download data. Here we use its [Python API](http://docs.datalad.org/en/latest/modref.htmlhttp://docs.datalad.org/en/latest/modref.html).\n", + "\n", + "We need the following data: \n", + "\n", + "1. event information (raw data)\n", + "2. preprocessed image data (fmriprep)\n", + "3. masks (fmriprep)\n", + "4. confounds (fmriprep)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a99bf091", + "metadata": { + "tags": [ + "remove-output" + ] + }, + "outputs": [], + "source": [ + "fmriprep_path = workflow_dir / '7_glm'/ 'data'\n", + "rawdata_path = workflow_dir / '7_glm' / 'raw_data'\n", + "os.makedirs(fmriprep_path, exist_ok=True)\n", + "os.makedirs(rawdata_path, exist_ok=True)\n", + "# Install datasets to specific datapaths\n", + "fmriprep_url = 'https://github.com/OpenNeuroDerivatives/ds000001-fmriprep.git'\n", + "rawdata_url = 'https://github.com/OpenNeuroDatasets/ds000001.git'\n", + "dl.install(source=rawdata_url, path=rawdata_path)\n", + "dl.install(source=fmriprep_url, path=fmriprep_path)" + ] + }, + { + "cell_type": "markdown", + "id": "67484d6a", + "metadata": {}, + "source": [ + "### Get data for each subject\n", + "\n", + "By `datalad.api.install`, datalad downloads all symlinks without storing the actual data locally. We can then use `datalad.api.get` to get the data we need for our analysis. \n", + "We need to get four types of data from two folders:\n", + "\n", + "1. event_info: `*events.tsv` from `rawdata_path`\n", + "2. bold: `*space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz` from `fmriprep_path`\n", + "3. mask: `*space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz` from `fmriprep_path`\n", + "4. confounds: `*desc-confounds_timeseries.tsv` from `fmriprep_path` (this is implicitly needed by `load_confounds_strategy`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d1fbfa3", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {\n", + " 'subj_id': int,\n", + " 'return': {'subj_id': int, 'subj_events': list, 'subj_imgs':list, 'subj_masks':list},\n", + " }\n", + ")\n", + "def get_subjdata(subj_id):\n", + " print(f\"\\nDownload data for subject-{subj_id}\")\n", + " # get events.tsv \n", + " subj_events = glob.glob(os.path.join(rawdata_path, 'sub-%02d' % subj_id, 'func', '*events.tsv'))\n", + " subj_events.sort()\n", + " for i in subj_events:\n", + " dl.get(i, dataset=rawdata_path)\n", + " # get bold\n", + " subj_imgs = glob.glob(os.path.join(fmriprep_path, 'sub-%02d' % subj_id, 'func', '*space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz'))\n", + " subj_imgs.sort()\n", + " for i in subj_imgs:\n", + " dl.get(i, dataset=fmriprep_path)\n", + " # get mask\n", + " subj_masks = glob.glob(os.path.join(fmriprep_path, 'sub-%02d' % subj_id, 'func', '*space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz'))\n", + " subj_masks.sort()\n", + " for i in subj_masks:\n", + " dl.get(i, dataset=fmriprep_path)\n", + " # get confounds list\n", + " subj_confounds = glob.glob(os.path.join(fmriprep_path, 'sub-%02d' % subj_id, 'func', '*desc-confounds_timeseries.tsv'))\n", + " subj_confounds.sort()\n", + " for i in subj_confounds:\n", + " dl.get(i, dataset=fmriprep_path)\n", + " return subj_id, subj_events, subj_imgs, subj_masks" + ] + }, + { + "cell_type": "markdown", + "id": "ef024e95", + "metadata": {}, + "source": [ + "## First-Level GLM\n", + "\n", + "The first level GLM has two parts:\n", + "- conduct GLM for each run on every subject\n", + "- average across runs for each subject with a fixed-effect model" + ] + }, + { + "cell_type": "markdown", + "id": "4c652b28", + "metadata": {}, + "source": [ + "### Get the first-level design matrix\n", + "\n", + "The design matrix is a _M(row)_ x _N(columns)_ matrix. _M_ corresponds to the number of _tr_, while _N_ corresponds to event conditions + confounds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83b52e93", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {\n", + " 'tr': float,\n", + " 'n_scans': int,\n", + " 'hrf_model': str,\n", + " 'subj_id': int,\n", + " 'run_id': int,\n", + " 'subj_imgs': list,\n", + " 'subj_events':list,\n", + " 'return': {'dm_path': str, 'run_id': int},\n", + " }\n", + ")\n", + "def get_firstlevel_dm(tr, n_scans, hrf_model, subj_id, run_id, subj_imgs, subj_events):\n", + " print(f\"\\nGet subject-{subj_id}, run-{run_id} firstlevel GLM design matrix...\\n\")\n", + " # read event file\n", + " run_img = subj_imgs[run_id-1]\n", + " run_event = subj_events[run_id-1]\n", + " event = pd.read_csv(run_event, sep='\\t').fillna(0)\n", + " event = event[['onset', 'duration', 'trial_type']]\n", + " # get list of confounds directly from fmriprepped bold\n", + " confounds = load_confounds_strategy(run_img, denoise_strategy='simple')[0]\n", + " frame_times = np.arange(n_scans) * tr\n", + " design_matrix = make_first_level_design_matrix(frame_times, event, \n", + " hrf_model=hrf_model,\n", + " add_regs=confounds) \n", + "\n", + " # make sure all design matrices have the same length of column\n", + " # if you have a block design, this is not needed.\n", + " # 39 = 4(events) + 34(confounds) + 13(drift) + 1(constant)\n", + " assert design_matrix.shape[1] == 52, \"This design matrix has the wrong column number\"\n", + " # sort the column order alphabetical for contrasts\n", + " design_matrix = design_matrix.reindex(sorted(design_matrix.columns), axis=1)\n", + " dm_path = os.path.join(workflow_out_dir, 'sub-%s_run-%s_designmatrix.csv' % (subj_id, run_id))\n", + " design_matrix.to_csv(dm_path, index=None)\n", + " return dm_path, run_id" + ] + }, + { + "cell_type": "markdown", + "id": "b9d8c639", + "metadata": {}, + "source": [ + "### Set up the first level contrasts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02c14978", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {\n", + " 'subj_id': int,\n", + " 'run_id': int,\n", + " 'dm_path': str,\n", + " 'return': {'contrasts': dict},\n", + " }\n", + ")\n", + "def set_contrast(subj_id, run_id, dm_path):\n", + " print(f\"\\nSet firstlevel contrast for subject-{subj_id}, run-{run_id} ...\\n\") \n", + " design_matrix = pd.read_csv(dm_path)\n", + " contrast_matrix = np.eye(design_matrix.shape[1])\n", + " basic_contrasts = dict([(column, contrast_matrix[i])\n", + " for i, column in enumerate(design_matrix.columns)])\n", + " contrasts = {\n", + " 'pumps-control': basic_contrasts['pumps_demean'] - basic_contrasts['control_pumps_demean'],\n", + " 'control-pumps': -basic_contrasts['control_pumps_demean'] + basic_contrasts['pumps_demean'],\n", + " 'pumps-baseline': basic_contrasts['pumps_demean'],\n", + " 'cash-baseline': basic_contrasts['cash_demean'],\n", + " 'explode-baseline': basic_contrasts['explode_demean']\n", + " }\n", + " return contrasts" + ] + }, + { + "cell_type": "markdown", + "id": "c3842a52", + "metadata": {}, + "source": [ + "### Fit the first level GLM" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "97c51941", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {\n", + " 'subj_id': int,\n", + " 'run_id': int,\n", + " 'subj_imgs': list,\n", + " 'subj_masks': list,\n", + " 'smoothing_fwhm': float,\n", + " 'dm_path': str,\n", + " 'contrasts':dict,\n", + " 'return': {'effect_size_path_dict': dict, 'effect_variance_path_dict': dict},\n", + " }\n", + ")\n", + "def firstlevel_estimation(subj_id, run_id, subj_imgs, subj_masks, smoothing_fwhm, dm_path, contrasts):\n", + " print(f\"\\nStart firstlevel estimation for subject-{subj_id}, run-{run_id} ...\\n\")\n", + " \n", + " # subsample img to reduce memory\n", + " run_img = subj_imgs[run_id-1]\n", + " img = load_img(run_img)\n", + " img_data = get_data(run_img)[::2,::2,::2]\n", + " new_img = nib.Nifti1Image(img_data, img.affine)\n", + " run_mask = subj_masks[run_id-1]\n", + " print('Fit the firstlevel model...')\n", + " first_level_model = FirstLevelModel(mask_img=run_mask, smoothing_fwhm=smoothing_fwhm)\n", + " dm= pd.read_csv(dm_path)\n", + " first_level_model = first_level_model.fit(new_img, design_matrices=dm)\n", + " print('Computing contrasts...')\n", + " effect_size_path_dict = dict.fromkeys(contrasts.keys())\n", + " effect_variance_path_dict = dict.fromkeys(contrasts.keys())\n", + " for index, (contrast_id, contrast_val) in enumerate(contrasts.items()):\n", + " print(' Contrast % 2i out of %i: %s' % (\n", + " index + 1, len(contrasts), contrast_id))\n", + " # Estimate the contasts. Note that the model implicitly computes a fixed\n", + " # effect across the two sessions\n", + " res = first_level_model.compute_contrast(contrast_val, output_type='all')\n", + " # write the resulting stat images to file\n", + " effect_size_path = os.path.join(workflow_out_dir, 'sub-%s_run-%s_contrast-%s_effect_size.nii.gz' % (subj_id, run_id, contrast_id))\n", + " effect_variance_path = os.path.join(workflow_out_dir, 'sub-%s_run-%s_contrast-%s_effect_varaince.nii.gz' % (subj_id, run_id, contrast_id))\n", + " effect_size_path_dict[contrast_id] = effect_size_path\n", + " effect_variance_path_dict[contrast_id] = effect_variance_path\n", + " res['effect_size'].to_filename(effect_size_path)\n", + " res['effect_variance'].to_filename(effect_variance_path)\n", + " \n", + " return effect_size_path_dict, effect_variance_path_dict" + ] + }, + { + "cell_type": "markdown", + "id": "1565da8f", + "metadata": {}, + "source": [ + "### Create the first level GLM workflow\n", + "\n", + "This workflow include GLM for each run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "202433ba", + "metadata": {}, + "outputs": [], + "source": [ + "# initiate the first-level GLM workflow\n", + "wf_firstlevel = Workflow(\n", + " name='wf_firstlevel',\n", + " input_spec=[\n", + " 'subj_id',\n", + " 'run_id',\n", + " 'subj_imgs',\n", + " 'subj_events',\n", + " 'subj_masks',\n", + " 'tr',\n", + " 'n_scans',\n", + " 'hrf_model',\n", + " 'smoothing_fwhm'\n", + " ],\n", + ")\n", + "\n", + "wf_firstlevel.split('run_id', run_id = wf_firstlevel.lzin.run_id)\n", + "# add task - get_firstlevel_dm\n", + "wf_firstlevel.add(\n", + " get_firstlevel_dm(\n", + " name = \"get_firstlevel_dm\",\n", + " tr = wf_firstlevel.lzin.tr, \n", + " n_scans = wf_firstlevel.lzin.n_scans, \n", + " hrf_model = wf_firstlevel.lzin.hrf_model, \n", + " subj_id = wf_firstlevel.lzin.subj_id, \n", + " run_id = wf_firstlevel.lzin.run_id, \n", + " subj_imgs = wf_firstlevel.lzin.subj_imgs, \n", + " subj_events = wf_firstlevel.lzin.subj_events,\n", + " )\n", + ")\n", + "\n", + "# add task - set_contrast\n", + "wf_firstlevel.add(\n", + " set_contrast(\n", + " name = \"set_contrast\",\n", + " subj_id = wf_firstlevel.lzin.subj_id,\n", + " run_id = wf_firstlevel.get_firstlevel_dm.lzout.run_id,\n", + " dm_path = wf_firstlevel.get_firstlevel_dm.lzout.dm_path\n", + " )\n", + ")\n", + "\n", + "# add task - firstlevel_estimation\n", + "wf_firstlevel.add(\n", + " firstlevel_estimation(\n", + " name = \"firstlevel_estimation\",\n", + " subj_id = wf_firstlevel.lzin.subj_id, \n", + " run_id = wf_firstlevel.get_firstlevel_dm.lzout.run_id, \n", + " subj_imgs = wf_firstlevel.lzin.subj_imgs, \n", + " subj_masks = wf_firstlevel.lzin.subj_masks,\n", + " smoothing_fwhm = wf_firstlevel.lzin.smoothing_fwhm, \n", + " dm_path = wf_firstlevel.get_firstlevel_dm.lzout.dm_path, \n", + " contrasts = wf_firstlevel.set_contrast.lzout.contrasts\n", + " )\n", + ")\n", + "\n", + "\n", + "wf_firstlevel.combine('run_id')\n", + "# specify output\n", + "wf_firstlevel.set_output(\n", + " [\n", + " ('first_level_contrast', wf_firstlevel.set_contrast.lzout.contrasts),\n", + " ('first_level_effect_size_list', wf_firstlevel.firstlevel_estimation.lzout.effect_size_path_dict),\n", + " ('first_level_effect_variance_list', wf_firstlevel.firstlevel_estimation.lzout.effect_variance_path_dict),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "db735ca0", + "metadata": {}, + "source": [ + "### Compute fixed effects\n", + "\n", + "Before we move to the second(group) level, we need to average results from all three runs from a fixed effect model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e42da7a", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'subj_id': int, \n", + " 'subj_masks': list,\n", + " 'contrasts': list,\n", + " 'effect_size_path_dict_list': list,\n", + " 'effect_variance_path_dict_list': list,\n", + " 'return': {'fixed_fx_contrast_path_dict': dict, 'fixed_fx_variance_path_dict': dict, 'fixed_fx_ttest_path_dict': dict},\n", + " }\n", + ")\n", + "def get_fixed_effcts(subj_id, subj_masks, contrasts, effect_size_path_dict_list, effect_variance_path_dict_list):\n", + " print(f\"contrast:{contrast}\")\n", + " print(f'Compute fixed effects for subject-{subj_id}...')\n", + " # average mask across three runs\n", + " mean_mask = math_img('np.mean(img, axis=-1)', img=subj_masks)\n", + " # binarize the mean mask\n", + " mask = math_img('img > 0', img=mean_mask)\n", + " fixed_fx_contrast_path_dict =dict.fromkeys(contrasts[0].keys())\n", + " fixed_fx_variance_path_dict = dict.fromkeys(contrasts[0].keys())\n", + " fixed_fx_ttest_path_dict = dict.fromkeys(contrasts[0].keys())\n", + " for index, (contrast_id, contrast_val) in enumerate(contrasts[0].items()):\n", + " print(' Contrast % 2i out of %i: %s' % (index + 1, len(contrasts[0]), contrast_id))\n", + " contrast_imgs = [nib.load(img_dict[contrast_id]) for img_dict in effect_size_path_dict_list]\n", + " variance_imgs = [nib.load(img_dict[contrast_id]) for img_dict in effect_variance_path_dict_list]\n", + " fixed_fx_contrast, fixed_fx_variance, fixed_fx_ttest = compute_fixed_effects(contrast_imgs, variance_imgs, mask)\n", + " \n", + " effect_size_path = os.path.join(workflow_out_dir, 'sub-%s_contrast-%s_fx_effect_size.nii.gz' % (subj_id, contrast_id))\n", + " variance_path = os.path.join(workflow_out_dir, 'sub-%s_contrast-%s_fx_effect_varaince.nii.gz' % (subj_id, contrast_id))\n", + " ttest_path = os.path.join(workflow_out_dir, 'sub-%s_contrast-%s_ttest_map.nii.gz' % (subj_id, contrast_id))\n", + " fixed_fx_contrast_path_dict[contrast_id] = effect_size_path\n", + " fixed_fx_variance_path_dict[contrast_id] = variance_path\n", + " fixed_fx_ttest_path_dict[contrast_id] = ttest_path\n", + " \n", + " fixed_fx_contrast.to_filename(effect_size_path)\n", + " fixed_fx_variance.to_filename(variance_path)\n", + " fixed_fx_ttest.to_filename(ttest_path)\n", + " return fixed_fx_contrast_path_dict, fixed_fx_variance_path_dict, fixed_fx_ttest_path_dict" + ] + }, + { + "cell_type": "markdown", + "id": "b9210376", + "metadata": {}, + "source": [ + "### Create the fixed effect workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f827e8c", + "metadata": {}, + "outputs": [], + "source": [ + "# initiate the fixed effect GLM workflow\n", + "wf_fixed_effect = Workflow(\n", + " name='wf_fixed_effect',\n", + " input_spec=[\n", + " 'subj_id',\n", + " 'run_id',\n", + " 'tr',\n", + " 'n_scans',\n", + " 'hrf_model',\n", + " 'smoothing_fwhm'\n", + " ],\n", + ")\n", + "\n", + "wf_fixed_effect.split('subj_id', subj_id = wf_fixed_effect.lzin.subj_id)\n", + "# add task - get_subj_file\n", + "wf_fixed_effect.add(\n", + " get_subjdata(\n", + " name = \"get_subjdata\",\n", + " subj_id = wf_fixed_effect.lzin.subj_id, \n", + " )\n", + ")\n", + "wf_firstlevel.inputs.subj_id = wf_fixed_effect.get_subjdata.lzout.subj_id\n", + "wf_firstlevel.inputs.run_id = wf_fixed_effect.lzin.run_id\n", + "wf_firstlevel.inputs.tr = wf_fixed_effect.lzin.tr\n", + "wf_firstlevel.inputs.n_scans = wf_fixed_effect.lzin.n_scans\n", + "wf_firstlevel.inputs.hrf_model = wf_fixed_effect.lzin.hrf_model\n", + "wf_firstlevel.inputs.smoothing_fwhm = wf_fixed_effect.lzin.smoothing_fwhm\n", + "wf_firstlevel.inputs.subj_imgs = wf_fixed_effect.get_subjdata.lzout.subj_imgs\n", + "wf_firstlevel.inputs.subj_events = wf_fixed_effect.get_subjdata.lzout.subj_events\n", + "wf_firstlevel.inputs.subj_masks = wf_fixed_effect.get_subjdata.lzout.subj_masks\n", + "wf_fixed_effect.add(wf_firstlevel)\n", + "\n", + "wf_fixed_effect.add(\n", + " get_fixed_effcts(\n", + " name = \"get_fixed_effcts\",\n", + " subj_id = wf_fixed_effect.get_subjdata.lzout.subj_id, \n", + " subj_masks = wf_fixed_effect.get_subjdata.lzout.subj_masks,\n", + " contrasts = wf_fixed_effect.wf_firstlevel.lzout.first_level_contrast, \n", + " effect_size_path_dict_list = wf_fixed_effect.wf_firstlevel.lzout.first_level_effect_size_list, \n", + " effect_variance_path_dict_list = wf_fixed_effect.wf_firstlevel.lzout.first_level_effect_variance_list\n", + " )\n", + ")\n", + "\n", + "wf_fixed_effect.combine('subj_id')\n", + "# specify output\n", + "wf_fixed_effect.set_output(\n", + " [\n", + " ('first_level_contrast', wf_fixed_effect.wf_firstlevel.lzout.first_level_contrast),\n", + " ('fx_effect_size_list', wf_fixed_effect.get_fixed_effcts.lzout.fixed_fx_contrast_path_dict),\n", + " ('fx_effect_variance_list', wf_fixed_effect.get_fixed_effcts.lzout.fixed_fx_variance_path_dict),\n", + " ('fx_t_test_list', wf_fixed_effect.get_fixed_effcts.lzout.fixed_fx_ttest_path_dict),\n", + " ]\n", + ")\n", + "\n", + "print(wf_fixed_effect.lzout.first_level_contrast)" + ] + }, + { + "cell_type": "markdown", + "id": "8cef3b93", + "metadata": {}, + "source": [ + "## Second-Level GLM\n", + "\n", + "The second level GLM, as known as the group level, averages results across subjects, containing the following steps:\n", + "- construct design matrix\n", + "- fit the second-level GLM\n", + "- statistical testing" + ] + }, + { + "cell_type": "markdown", + "id": "594069ce", + "metadata": {}, + "source": [ + "### Get the second level design matrix\n", + "\n", + "This is a one-group design. So we need a design matrix for a one-sample test.\n", + "\n", + "The design matrix is a single column of ones, corresponding to the model intercept." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39b10152", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'n_subj': int, 'return': {'design_matrix': ty.Any}}\n", + ")\n", + "def get_secondlevel_dm(n_subj):\n", + " t1 = datetime.datetime.now()\n", + " print(f\"\\nGet secondlevel design matrix ...\\n\")\n", + " design_matrix = pd.DataFrame([1] * n_subj,columns=['intercept'])\n", + " return design_matrix" + ] + }, + { + "cell_type": "markdown", + "id": "38be698e", + "metadata": {}, + "source": [ + "### Fit the second level GLM\n", + "\n", + "Here, we use the list of FirstLevel z-maps as the input for the SecondLevelModel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "336cb11a", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'firstlevel_stats_list': list, 'design_matrix': ty.Any, 'firstlevel_contrast':list, \n", + " 'return': {'secondlevel_mask': ty.Any, 'stat_maps_dict': dict}}\n", + ")\n", + "def secondlevel_estimation(firstlevel_stats_list, design_matrix, firstlevel_contrast): \n", + " print(f\"\\nStart secondlevel estimation ...\\n\")\n", + " stat_maps_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", + " for index, (contrast_id, contrast_val) in enumerate(firstlevel_contrast[0][0].items()):\n", + " print(' Contrast % 2i out of %i: %s' % (\n", + " index + 1, len(firstlevel_contrast[0][0]), contrast_id))\n", + " second_level_input = [nib.load(stats_dict[contrast_id]) for stats_dict in firstlevel_stats_list]\n", + " second_level_model = SecondLevelModel()\n", + " second_level_model = second_level_model.fit(second_level_input, design_matrix=design_matrix)\n", + " secondlevel_mask = second_level_model.masker_.mask_img_\n", + " \n", + " stats = second_level_model.compute_contrast(output_type='all')\n", + " # write the resulting stat images to file\n", + " z_image_path = os.path.join(workflow_out_dir, 'secondlevel_contrast-%s_z_map.nii.gz' % contrast_id)\n", + " stat_maps_dict[contrast_id] = stats\n", + " stats['z_score'].to_filename(z_image_path)\n", + " plot_path = os.path.join(workflow_out_dir, 'secondlevel_unthresholded_contrast-%s_zmap.jpg' % contrast_id)\n", + " plot_glass_brain(stats['z_score'],\n", + " colorbar=True,\n", + " threshold=norm.isf(0.001),\n", + " title='Unthresholded z map',\n", + " output_file=plot_path)\n", + " return secondlevel_mask, stat_maps_dict" + ] + }, + { + "cell_type": "markdown", + "id": "c2c8e7d2", + "metadata": {}, + "source": [ + "### Create the second level GLM workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e72933d", + "metadata": {}, + "outputs": [], + "source": [ + "# initiate the first-level GLM workflow\n", + "wf_secondlevel = Workflow(\n", + " name='wf_secondlevel',\n", + " input_spec=[\n", + " 'n_subj',\n", + " 'firstlevel_stats_list', \n", + " 'firstlevel_contrast',\n", + " 'n_perm',\n", + " ],\n", + ")\n", + "\n", + "# add task - get_secondlevel_dm\n", + "wf_secondlevel.add(\n", + " get_secondlevel_dm(\n", + " name = \"get_secondlevel_dm\",\n", + " n_subj = wf_secondlevel.lzin.n_subj, \n", + " )\n", + ")\n", + "\n", + "# add task - secondlevel_estimation\n", + "wf_secondlevel.add(\n", + " secondlevel_estimation(\n", + " name = \"secondlevel_estimation\",\n", + " firstlevel_stats_list = wf_secondlevel.lzin.firstlevel_stats_list, \n", + " design_matrix = wf_secondlevel.get_secondlevel_dm.lzout.design_matrix, \n", + " firstlevel_contrast = wf_secondlevel.lzin.firstlevel_contrast\n", + " )\n", + ")\n", + "\n", + "# specify output\n", + "wf_secondlevel.set_output(\n", + " [\n", + " ('second_level_designmatrix', wf_secondlevel.get_secondlevel_dm.lzout.design_matrix),\n", + " ('second_level_mask', wf_secondlevel.secondlevel_estimation.lzout.secondlevel_mask),\n", + " ('second_level_stats_map', wf_secondlevel.secondlevel_estimation.lzout.stat_maps_dict)\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e3992216", + "metadata": {}, + "source": [ + "## Statistical Testing\n", + "\n", + "In this section, we present different ways of doing statistical testing\n", + "\n", + "1. Cluster-thresholding without multiple comparison\n", + "2. Multiple comparison using FDR\n", + "3. Parametric testing\n", + "4. NonParametric testing" + ] + }, + { + "cell_type": "markdown", + "id": "81eecd78", + "metadata": {}, + "source": [ + "### Cluster-thresholding and Plot without multiple comparison\n", + "\n", + "Threshold the resulting map without multiple comparisons correction, abs(z) > 3.29 (equivalent to p < 0.001), cluster size > 10 voxels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b566f1d3", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'stat_maps_dict': dict, 'threshold': float, 'cluster_threshold': int, \n", + " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", + ")\n", + "def cluster_thresholding(stat_maps_dict, threshold, cluster_threshold):\n", + " t1 = datetime.datetime.now()\n", + " print(\"\\nStart cluster thresholding ...\\n\")\n", + " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", + " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", + " for index, (stats_id, stats_val) in enumerate(stat_maps_dict.items()):\n", + " print('Contrast % 2i out of %i: %s' % (\n", + " index + 1, len(stat_maps_dict), stats_id))\n", + " thresholded_map = threshold_img(\n", + " img = stats_val['z_score'],\n", + " threshold=threshold,\n", + " cluster_threshold=cluster_threshold,\n", + " two_sided=True,\n", + " )\n", + " thresholded_map_path = os.path.join(workflow_out_dir, 'secondlevel_cluster_thresholded_contrast-%s_z_map.nii.gz' % stats_id)\n", + " thresholded_map_dict[stats_id] = thresholded_map_path\n", + " thresholded_map.to_filename(thresholded_map_path)\n", + " plot_path = os.path.join(workflow_out_dir, \n", + " 'secondlevel_cluster_thresholded_contrast-%s_zmap.jpg' % stats_id)\n", + " plot_contrast_dict[stats_id] = plot_path\n", + " plot_stat_map(thresholded_map,\n", + " title='Cluster Thresholded z map',\n", + " output_file=plot_path)\n", + " print(\"\\nCluster thresholding is done\")\n", + " return thresholded_map_dict, plot_contrast_dict" + ] + }, + { + "cell_type": "markdown", + "id": "7c830b46", + "metadata": {}, + "source": [ + "### Multiple comparison and Plot\n", + "\n", + "We have the following choices:\n", + "- `fdr`: False Discovery Rate (FDR <.05) and no cluster-level threshold\n", + "- `fpr`: False Positive Rate\n", + "- `bonferroni`\n", + "\n", + "More details see [here](https://nilearn.github.io/stable/modules/generated/nilearn.glm.threshold_stats_img.html#nilearn.glm.threshold_stats_img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce280b52", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'stat_maps_dict': dict, 'alpha': float, 'height_control': str, \n", + " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", + ")\n", + "def multiple_comparison(stat_maps_dict, alpha, height_control):\n", + " print(\"\\nStart multiple comparison ...\\n\")\n", + " from nilearn.glm import threshold_stats_img\n", + " from nilearn.plotting import plot_stat_map\n", + " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", + " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", + " for index, (stats_id, stats_val) in enumerate(stat_maps_dict.items()):\n", + " print('Contrast % 2i out of %i: %s' % (\n", + " index + 1, len(stat_maps_dict), stats_id))\n", + " thresholded_map, threshold = threshold_stats_img(\n", + " stat_img=stats_val['z_score'], \n", + " alpha=alpha, \n", + " height_control=height_control)\n", + " thresholded_map_path = os.path.join(workflow_out_dir, \n", + " 'secondlevel_multiple_comp_corrected_contrast-%s_z_map.nii.gz' % stats_id)\n", + " thresholded_map_dict[stats_id] = thresholded_map_path\n", + " thresholded_map.to_filename(thresholded_map_path)\n", + " plot_path = os.path.join(workflow_out_dir, \n", + " 'secondlevel_multiple_comp_corrected_contrast-%s_zmap.jpg' % stats_id)\n", + " plot_contrast_dict[stats_id] = plot_path\n", + " plot_stat_map(thresholded_map,\n", + " title='Thresholded z map, expected fdr = .05',\n", + " threshold=threshold, \n", + " output_file=plot_path)\n", + " print(\"\\nMultiple comparison is done\")\n", + " return thresholded_map_dict, plot_contrast_dict" + ] + }, + { + "cell_type": "markdown", + "id": "f75d4d17", + "metadata": {}, + "source": [ + "### Parametric test & Plot\n", + "\n", + "We threshold the second level contrast at uncorrected p < 0.001.\n", + "\n", + "A nilearn example see [here](https://nilearn.github.io/dev/auto_examples/05_glm_second_level/plot_second_level_one_sample_test.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bd90cb2", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'stat_maps_dict': dict, \n", + " 'secondlevel_mask': ty.Any,\n", + " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", + ")\n", + "def parametric_test(stat_maps_dict, secondlevel_mask):\n", + " print(\"\\nStart parametric test ...\\n\")\n", + " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", + " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", + " for index, (stats_id, stats_val) in enumerate(stat_maps_dict.items()):\n", + " print('Contrast % 2i out of %i: %s' % (\n", + " index + 1, len(stat_maps_dict), stats_id))\n", + " p_val = stats_val['p_value']\n", + " n_voxels = np.sum(get_data(img=secondlevel_mask))\n", + " # Correcting the p-values for multiple testing and taking negative logarithm\n", + " neg_log_pval = math_img(\"-np.log10(np.minimum(1, img * {}))\"\n", + " .format(str(n_voxels)),\n", + " img=p_val)\n", + " \n", + " thresholded_map_path = os.path.join(workflow_out_dir, 'secondlevel_Parametric_thresholded_contrast-%s_z_map.nii.gz' % stats_id)\n", + " thresholded_map_dict[stats_id] = thresholded_map_path\n", + " neg_log_pval.to_filename(thresholded_map_path)\n", + " \n", + " # Since we are plotting negative log p-values and using a threshold equal to 1,\n", + " # it corresponds to corrected p-values lower than 10%, meaning that there is\n", + " # less than 10% probability to make a single false discovery (90% chance that\n", + " # we make no false discovery at all). This threshold is much more conservative\n", + " # than the previous one.\n", + " title = ('parametric test (FWER < 10%)')\n", + " plot_path = os.path.join(workflow_out_dir, \n", + " 'secondlevel_Parametric_thresholded_contrast-%s_zmap.jpg' % stats_id)\n", + " plot_contrast_dict[stats_id] = plot_path\n", + " plot_stat_map(\n", + " neg_log_pval, colorbar=True,\n", + " title=title, output_file=plot_path)\n", + " print(\"\\nParametric test is done\")\n", + " return thresholded_map_dict, plot_contrast_dict" + ] + }, + { + "cell_type": "markdown", + "id": "37358f38", + "metadata": {}, + "source": [ + "### Non-Parametric test & Plot\n", + "\n", + "Here we compute the (corrected) negative log p-values with permutation test." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5482dcd1", + "metadata": {}, + "outputs": [], + "source": [ + "@pydra.mark.task\n", + "@pydra.mark.annotate(\n", + " {'firstlevel_stats_list': list, 'smoothing_fwhm':float,'design_matrix': ty.Any, 'firstlevel_contrast': list, 'n_perm': int, \n", + " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", + ")\n", + "def nonparametric_test(firstlevel_stats_list, smoothing_fwhm, design_matrix, firstlevel_contrast, n_perm):\n", + " print(f\"\\nStart nonparametric test ...\\n\")\n", + " thresholded_map_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", + " plot_contrast_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", + " for index, (contrast_id, contrast_val) in enumerate(firstlevel_contrast[0][0].items()):\n", + " print(' Contrast % 2i out of %i: %s' % (\n", + " index + 1, len(firstlevel_contrast[0][0]), contrast_id))\n", + " # here we set threshold as none to do voxel-level FWER-correction.\n", + " second_level_input = [nib.load(stats_dict[contrast_id]) for stats_dict in firstlevel_stats_list]\n", + " neg_log_pvals_permuted_ols_unmasked = \\\n", + " non_parametric_inference(second_level_input=second_level_input, design_matrix=design_matrix,\n", + " model_intercept=True, n_perm=n_perm,\n", + " two_sided_test=False, smoothing_fwhm=smoothing_fwhm, n_jobs=1)\n", + " thresholded_map_path = os.path.join(workflow_out_dir, 'secondlevel_permutation_contrast-%s_z_map.nii.gz' % contrast_id)\n", + " thresholded_map_dict[contrast_id] = thresholded_map_path\n", + " neg_log_pvals_permuted_ols_unmasked.to_filename(thresholded_map_path)\n", + " # here I actually have more than one contrast\n", + " title = ('permutation test (FWER < 10%)')\n", + " plot_path = os.path.join(workflow_out_dir, 'secondlevel_permutation_contrast-%s_zmap.jpg' % contrast_id)\n", + " plot_contrast_dict[contrast_id] = plot_path\n", + " plot_stat_map(\n", + " neg_log_pvals_permuted_ols_unmasked, colorbar=True, \n", + " title=title, output_file=plot_path)\n", + " print(\"\\nPermutation is done\")\n", + " return thresholded_map_dict, plot_contrast_dict" + ] + }, + { + "cell_type": "markdown", + "id": "54c2201a", + "metadata": {}, + "source": [ + "## The Ultimate Workflow\n", + "\n", + "Now, let's connect all tasks and workflows together.\n", + "\n", + "Here we randomly choose **5** subjects to perform the analysis. \n", + "\n", + "For computational time, we set `n_perm=100`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e40304a3", + "metadata": {}, + "outputs": [], + "source": [ + "wf = Workflow(\n", + " name='twolevel_glm',\n", + " input_spec=['n_subj'],\n", + ")\n", + "\n", + "wf.inputs.n_subj = 2\n", + "\n", + "# randomly choose subjects\n", + "wf_fixed_effect.inputs.subj_id = random.sample(range(1,17), wf.inputs.n_subj)\n", + "wf_fixed_effect.inputs.run_id =[1,2]\n", + "wf_fixed_effect.inputs.tr = 2.3\n", + "wf_fixed_effect.inputs.n_scans = 300\n", + "wf_fixed_effect.inputs.hrf_model = 'glover'\n", + "wf_fixed_effect.inputs.smoothing_fwhm = 5.0\n", + "wf.add(wf_fixed_effect)\n", + "\n", + "wf_secondlevel.inputs.n_subj = wf.inputs.n_subj\n", + "wf_secondlevel.inputs.firstlevel_stats_list = wf.wf_fixed_effect.lzout.fx_t_test_list \n", + "wf_secondlevel.inputs.firstlevel_contrast = wf.wf_fixed_effect.lzout.first_level_contrast\n", + "wf.add(wf_secondlevel)\n", + "\n", + "# add task - cluster_thresholding\n", + "wf.add(\n", + " cluster_thresholding(\n", + " name = \"cluster_thresholding\",\n", + " stat_maps_dict = wf.wf_secondlevel.lzout.second_level_stats_map, \n", + " threshold = 3.29, \n", + " cluster_threshold = 10\n", + " )\n", + ")\n", + "\n", + "\n", + "# add task - multiple_comparison\n", + "wf.add(\n", + " multiple_comparison(\n", + " name = \"multiple_comparison\",\n", + " stat_maps_dict = wf.wf_secondlevel.lzout.second_level_stats_map, \n", + " alpha = 0.05,\n", + " height_control = 'fdr'\n", + " )\n", + ")\n", + "\n", + "# add task - parametric_test\n", + "wf.add(\n", + " parametric_test(\n", + " name = \"parametric_test\",\n", + " stat_maps_dict = wf.wf_secondlevel.lzout.second_level_stats_map, \n", + " secondlevel_mask = wf.wf_secondlevel.lzout.second_level_mask\n", + " )\n", + " \n", + ")\n", + "\n", + "# add task - nonparametric_test\n", + "wf.add(\n", + " nonparametric_test(\n", + " name = \"nonparametric_test\",\n", + " firstlevel_stats_list = wf.wf_fixed_effect.lzout.fx_t_test_list,\n", + " smoothing_fwhm = 5.0,\n", + " design_matrix = wf.wf_secondlevel.lzout.second_level_designmatrix,\n", + " firstlevel_contrast = wf.wf_fixed_effect.lzout.first_level_contrast,\n", + " n_perm = 100,\n", + " )\n", + ")\n", + "\n", + "wf.set_output(\n", + " [\n", + " ('second_level_stats_map', wf.wf_secondlevel.lzout.second_level_stats_map) \n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "27ce8b99", + "metadata": {}, + "source": [ + "### Run Workflow Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "487fc005", + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "from pydra import Submitter\n", + "\n", + "with Submitter(plugin='cf', n_procs=1) as submitter:\n", + " submitter(wf)\n", + "\n", + "results = wf.result()\n", + "\n", + "print(results)" + ] + }, + { + "cell_type": "markdown", + "id": "57e9a1a1", + "metadata": {}, + "source": [ + "## Let's Plot!\n", + "\n", + "We only use 5 subjects, so it's reasonable the following plots have nothing survived from testing." + ] + }, + { + "cell_type": "markdown", + "id": "a9a671d3", + "metadata": {}, + "source": [ + "### Unthresholded\n", + "\n", + "Let's plot the unthresholded image first." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01f314c3", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "ut_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_unthresholded*.jpg\"))\n", + "Image(filename=ut_list[0])" + ] + }, + { + "cell_type": "markdown", + "id": "e87582eb", + "metadata": {}, + "source": [ + "### Cluster Thresholding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "92797899", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "from IPython.display import Image\n", + "ct_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_cluster_thresholded*.jpg\"))\n", + "Image(filename=ct_list[0])" + ] + }, + { + "cell_type": "markdown", + "id": "a6b6f0ac", + "metadata": {}, + "source": [ + "### Multiple Comparison" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "270bf2b9", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "mc_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_multiple_comp*.jpg\"))\n", + "Image(filename=mc_list[0])" + ] + }, + { + "cell_type": "markdown", + "id": "1a106690", + "metadata": {}, + "source": [ + "### Parametric Test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34d1d04f", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "pt_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_Parametric*.jpg\"))\n", + "Image(filename=pt_list[0])" + ] + }, + { + "cell_type": "markdown", + "id": "b4710f5d", + "metadata": {}, + "source": [ + "### NonParametric Test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1f354cc", + "metadata": { + "tags": [ + "hide-input" + ] + }, + "outputs": [], + "source": [ + "npt_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_permutation*.jpg\"))\n", + "Image(filename=npt_list[0])" + ] + }, + { + "cell_type": "markdown", + "id": "47100abc", + "metadata": {}, + "source": [ + "## Exercise #1" + ] + }, + { + "cell_type": "markdown", + "id": "900179cf", + "metadata": {}, + "source": [ + "In this example, we conducted GLM on each run per subject separately and then used a fixed-effect model to average across runs. \n", + "\n", + "Where did we put `.splitter` and `.combiner`. Why did we put it there?" + ] + }, + { + "cell_type": "markdown", + "id": "20c26eac", + "metadata": {}, + "source": [ + "## Exercise #2" + ] + }, + { + "cell_type": "markdown", + "id": "a3193098", + "metadata": {}, + "source": [ + "Moreover, We choose this approach due to limited memory on GitHub. [FirstLevelModel](https://nilearn.github.io/stable/modules/generated/nilearn.glm.first_level.FirstLevelModel.html) in Nilearn also allows to compute multiple runs with a fixed-effect model simultaneously. Here is an [example](https://nilearn.github.io/stable/auto_examples/04_glm_first_level/plot_fiac_analysis.html#sphx-glr-auto-examples-04-glm-first-level-plot-fiac-analysis-py). \n", + "\n", + "Would you like to give it a try on your own?" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,md:myst" + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index ca804f0839..75f2223241 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -96,6 +96,8 @@ Indices and tables :hidden: examples/t1w-preprocess + examples/first-level-glm + examples/two-level-glm .. toctree:: :maxdepth: 2 diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index 4ae22d0ed4..1e91d0960a 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -6,18 +6,15 @@ "source": [ "# Getting started\n", "\n", + "## Running your first task\n", + "\n", "The basic runnable component of Pydra is a *task*. Tasks are conceptually similar to\n", - "functions, in that they take inputs, operate on them and then return results. However,\n", + "functions, in that they take inputs, process them and then return results. However,\n", "unlike functions, tasks are parameterised before they are executed in a separate step.\n", "This enables parameterised tasks to be linked together into workflows that are checked for\n", "errors before they are executed, and modular execution workers and environments to specified\n", "independently of the task being performed.\n", "\n", - "Tasks can encapsulate Python functions, shell-commands or workflows constructed from\n", - "task components.\n", - "\n", - "## Running your first task\n", - "\n", "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). Pre-define task definitions are run by\n", "\n", @@ -25,12 +22,12 @@ "* instantiate the class with the parameters of the task\n", "* \"call\" resulting object to execute it as you would a function (i.e. with the `my_task(...)`)\n", "\n", - "To demonstrate with a toy example of loading a JSON file with the `pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" + "To demonstrate with a toy example, of loading a JSON file with the `pydra.tasks.common.LoadJson` task, this we first create an example JSON file" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -38,7 +35,7 @@ "from tempfile import mkdtemp\n", "import json\n", "\n", - "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", + "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]}\n", "\n", "test_dir = Path(mkdtemp())\n", "json_file = test_dir / \"test.json\"\n", @@ -73,19 +70,6 @@ "assert result.output.out == JSON_CONTENTS" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `Result` object contains\n", - "\n", - "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", - "* `runtime`: information about the peak memory and CPU usage\n", - "* `errored`: the error status of the task\n", - "* `task`: the task object that generated the results\n", - "* `output_dir`: the output directory the results are stored in" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -159,14 +143,14 @@ "# as the number of NIfTI files\n", "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", "\n", - "mrgrid_varying_vox_sizes = MrGrid().split(\n", + "mrgrid_varying_sizes = MrGrid().split(\n", " (\"input\", \"voxel\"),\n", " input=nifti_dir.iterdir(),\n", " voxel=VOXEL_SIZES\n", ")\n", "\n", "# Run the task to resample all NIfTI files with different voxel sizes\n", - "result = mrgrid_varying_vox_sizes(cache_dir=test_dir / \"cache\")" + "result = mrgrid()" ] }, { @@ -175,84 +159,7 @@ "source": [ "## Cache directories\n", "\n", - "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", - "task and the operation to be performed. This hash is used to name the output directory for\n", - "the task within the specified cache directory. Therefore, if you use the same cache\n", - "directory between runs and in a subsequent run the same task is executed with the same\n", - "inputs then the location of its output directory will also be the same, and the outputs\n", - "generated by the previous run are reused." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "result1 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result1.output_dir == result.output_dir\n", - "\n", - "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", - "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "result2 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", - "\n", - "# The output directory will be different as the inputs are now different\n", - "assert result2.output_dir != result.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that for file objects, the contents of the files are used to calculate the hash\n", - "not their paths. Therefore, when inputting large files there might be some additional\n", - "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", - "shouldn't need to be recalculated unless they are modified). However, this makes the\n", - "hashes invariant to file-system movement. For example, changing the name of one of the\n", - "files in the nifti directory won't invalidate the hash." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Rename a NIfTI file within the test directory\n", - "first_file = next(nifti_dir.iterdir())\n", - "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", - "\n", - "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "result3 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result3.output_dir == result.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Debugging\n", - "\n", - "Work in progress..." + "When a task runs, a hash is generated by the combination of all the inputs to the task and the task to be run." ] }, { diff --git a/pyproject.toml b/pyproject.toml index 4b2b4af53d..5a6a96c414 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,4 +100,4 @@ target-version = ['py38'] exclude = "pydra/engine/_version.py" [tool.codespell] -ignore-words-list = "nd,afile,inpt" +ignore-words-list = "nd,afile,inpt,fpr" From 7a7cffd17e639dc6f46542d300c76853e17b1499 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 23:26:42 +1100 Subject: [PATCH 112/342] touched up errors in first-level-glm --- new-docs/source/conf.py | 3 +++ new-docs/source/examples/first-level-glm.ipynb | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/new-docs/source/conf.py b/new-docs/source/conf.py index d5235029a1..3282fb2e66 100644 --- a/new-docs/source/conf.py +++ b/new-docs/source/conf.py @@ -49,6 +49,9 @@ "numpydoc", ] + +nbsphinx_allow_errors = True + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/new-docs/source/examples/first-level-glm.ipynb b/new-docs/source/examples/first-level-glm.ipynb index a99dad8458..aecba6e5dd 100644 --- a/new-docs/source/examples/first-level-glm.ipynb +++ b/new-docs/source/examples/first-level-glm.ipynb @@ -57,7 +57,7 @@ "\n", "from pydra.design import python, workflow\n", "from fileformats.generic import File, Directory\n", - "from fileforamts.text import Csv\n", + "from fileformats.text import Csv\n", "import pandas as pd\n", "from scipy.stats import norm\n", "\n", @@ -586,9 +586,9 @@ "source": [ "wf = FullWorkflow(output_dir=workflow_out_dir, n_subjects=1, contrast='StopSuccess - Go')\n", "\n", - "results = wf(plugin='cf', n_procs=4)\n", - "\n", - "print(results)" + "if False:\n", + " results = wf(plugin='cf', n_procs=4)\n", + " print(results)" ] }, { From edd682fab6e4f47bdfce547b2ab10b2d1c507aaf Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 23:29:54 +1100 Subject: [PATCH 113/342] reinstated changes to getting-started --- .../source/tutorial/getting-started.ipynb | 111 ++++++++++++++++-- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index 1e91d0960a..4ae22d0ed4 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -6,15 +6,18 @@ "source": [ "# Getting started\n", "\n", - "## Running your first task\n", - "\n", "The basic runnable component of Pydra is a *task*. Tasks are conceptually similar to\n", - "functions, in that they take inputs, process them and then return results. However,\n", + "functions, in that they take inputs, operate on them and then return results. However,\n", "unlike functions, tasks are parameterised before they are executed in a separate step.\n", "This enables parameterised tasks to be linked together into workflows that are checked for\n", "errors before they are executed, and modular execution workers and environments to specified\n", "independently of the task being performed.\n", "\n", + "Tasks can encapsulate Python functions, shell-commands or workflows constructed from\n", + "task components.\n", + "\n", + "## Running your first task\n", + "\n", "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). Pre-define task definitions are run by\n", "\n", @@ -22,12 +25,12 @@ "* instantiate the class with the parameters of the task\n", "* \"call\" resulting object to execute it as you would a function (i.e. with the `my_task(...)`)\n", "\n", - "To demonstrate with a toy example, of loading a JSON file with the `pydra.tasks.common.LoadJson` task, this we first create an example JSON file" + "To demonstrate with a toy example of loading a JSON file with the `pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +38,7 @@ "from tempfile import mkdtemp\n", "import json\n", "\n", - "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.5598136790149003, 6]}\n", + "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", "\n", "test_dir = Path(mkdtemp())\n", "json_file = test_dir / \"test.json\"\n", @@ -70,6 +73,19 @@ "assert result.output.out == JSON_CONTENTS" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Result` object contains\n", + "\n", + "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", + "* `runtime`: information about the peak memory and CPU usage\n", + "* `errored`: the error status of the task\n", + "* `task`: the task object that generated the results\n", + "* `output_dir`: the output directory the results are stored in" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -143,14 +159,14 @@ "# as the number of NIfTI files\n", "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", "\n", - "mrgrid_varying_sizes = MrGrid().split(\n", + "mrgrid_varying_vox_sizes = MrGrid().split(\n", " (\"input\", \"voxel\"),\n", " input=nifti_dir.iterdir(),\n", " voxel=VOXEL_SIZES\n", ")\n", "\n", "# Run the task to resample all NIfTI files with different voxel sizes\n", - "result = mrgrid()" + "result = mrgrid_varying_vox_sizes(cache_dir=test_dir / \"cache\")" ] }, { @@ -159,7 +175,84 @@ "source": [ "## Cache directories\n", "\n", - "When a task runs, a hash is generated by the combination of all the inputs to the task and the task to be run." + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", + "task and the operation to be performed. This hash is used to name the output directory for\n", + "the task within the specified cache directory. Therefore, if you use the same cache\n", + "directory between runs and in a subsequent run the same task is executed with the same\n", + "inputs then the location of its output directory will also be the same, and the outputs\n", + "generated by the previous run are reused." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "result1 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result1.output_dir == result.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "result2 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result2.output_dir != result.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "first_file = next(nifti_dir.iterdir())\n", + "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", + "\n", + "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "result3 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result3.output_dir == result.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Debugging\n", + "\n", + "Work in progress..." ] }, { From f5276b96ec7526ccb06a577661e36201338fb46f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 29 Dec 2024 23:35:25 +1100 Subject: [PATCH 114/342] touched up getting-started --- new-docs/source/tutorial/getting-started.ipynb | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index 4ae22d0ed4..b075fd448e 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -19,13 +19,14 @@ "## Running your first task\n", "\n", "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", - "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). Pre-define task definitions are run by\n", + "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). To use a pre-defined task definition\n", "\n", - "* importing the class from the `pydra.tasks.*` package it is in\n", - "* instantiate the class with the parameters of the task\n", - "* \"call\" resulting object to execute it as you would a function (i.e. with the `my_task(...)`)\n", + "* import the class from the `pydra.tasks.*` package it is in\n", + "* instantiate it with appropriate parameters\n", + "* \"call\" resulting object (i.e. `my_task(...)`) to execute it as you would a function \n", "\n", - "To demonstrate with a toy example of loading a JSON file with the `pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" + "To demonstrate with a toy example of loading a JSON file with the\n", + "`pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" ] }, { @@ -250,7 +251,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Debugging\n", + "## Debugging failed tasks\n", "\n", "Work in progress..." ] From 8170777acadc1b9031ecd674bdedf5a30a0bd012 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 30 Dec 2024 14:58:47 +1100 Subject: [PATCH 115/342] converted two-level glm to new syntax --- new-docs/source/examples/two-level-glm.ipynb | 554 +++++++++---------- 1 file changed, 260 insertions(+), 294 deletions(-) diff --git a/new-docs/source/examples/two-level-glm.ipynb b/new-docs/source/examples/two-level-glm.ipynb index c21efbb25c..5886d7335f 100644 --- a/new-docs/source/examples/two-level-glm.ipynb +++ b/new-docs/source/examples/two-level-glm.ipynb @@ -54,12 +54,11 @@ "if not sys.warnoptions:\n", " warnings.simplefilter(\"ignore\")\n", "\n", - "import os, glob\n", + "import os\n", + "import glob\n", "import datetime\n", "import random\n", - "import pydra\n", - "from pydra import Workflow\n", - "from pydra.engine.specs import File, MultiInputFile, MultiOutputFile\n", + "from pydra.design import python, workflow\n", "import typing as ty\n", "from pathlib import Path\n", "import datalad.api as dl\n", @@ -156,14 +155,9 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {\n", - " 'subj_id': int,\n", - " 'return': {'subj_id': int, 'subj_events': list, 'subj_imgs':list, 'subj_masks':list},\n", - " }\n", - ")\n", - "def get_subjdata(subj_id):\n", + "\n", + "@python.define(outputs=['subj_id','subj_events', 'subj_imgs', 'subj_masks'])\n", + "def GetSubjData(subj_id: int) -> tuple[int, list, list, list]:\n", " print(f\"\\nDownload data for subject-{subj_id}\")\n", " # get events.tsv \n", " subj_events = glob.glob(os.path.join(rawdata_path, 'sub-%02d' % subj_id, 'func', '*events.tsv'))\n", @@ -217,20 +211,8 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {\n", - " 'tr': float,\n", - " 'n_scans': int,\n", - " 'hrf_model': str,\n", - " 'subj_id': int,\n", - " 'run_id': int,\n", - " 'subj_imgs': list,\n", - " 'subj_events':list,\n", - " 'return': {'dm_path': str, 'run_id': int},\n", - " }\n", - ")\n", - "def get_firstlevel_dm(tr, n_scans, hrf_model, subj_id, run_id, subj_imgs, subj_events):\n", + "@python.define(outputs={'dm_path': str, 'run_id': int})\n", + "def GetFirstLevelDm(tr: float, n_scans: int, hrf_model: str, subj_id: int, run_id: int, subj_imgs: list, subj_events: list):\n", " print(f\"\\nGet subject-{subj_id}, run-{run_id} firstlevel GLM design matrix...\\n\")\n", " # read event file\n", " run_img = subj_imgs[run_id-1]\n", @@ -270,16 +252,8 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {\n", - " 'subj_id': int,\n", - " 'run_id': int,\n", - " 'dm_path': str,\n", - " 'return': {'contrasts': dict},\n", - " }\n", - ")\n", - "def set_contrast(subj_id, run_id, dm_path):\n", + "@python.define(outputs={'contrasts': dict})\n", + "def SetContrast(subj_id: int, run_id: int, dm_path: str):\n", " print(f\"\\nSet firstlevel contrast for subject-{subj_id}, run-{run_id} ...\\n\") \n", " design_matrix = pd.read_csv(dm_path)\n", " contrast_matrix = np.eye(design_matrix.shape[1])\n", @@ -310,20 +284,16 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {\n", - " 'subj_id': int,\n", - " 'run_id': int,\n", - " 'subj_imgs': list,\n", - " 'subj_masks': list,\n", - " 'smoothing_fwhm': float,\n", - " 'dm_path': str,\n", - " 'contrasts':dict,\n", - " 'return': {'effect_size_path_dict': dict, 'effect_variance_path_dict': dict},\n", - " }\n", - ")\n", - "def firstlevel_estimation(subj_id, run_id, subj_imgs, subj_masks, smoothing_fwhm, dm_path, contrasts):\n", + "@python.define(outputs={'effect_size_path_dict': dict, 'effect_variance_path_dict': dict})\n", + "def FirstLevelEstimation(\n", + " subj_id: int,\n", + " run_id: int,\n", + " subj_imgs: list,\n", + " subj_masks: list,\n", + " smoothing_fwhm:float,\n", + " dm_path: str,\n", + " contrasts: dict\n", + "):\n", " print(f\"\\nStart firstlevel estimation for subject-{subj_id}, run-{run_id} ...\\n\")\n", " \n", " # subsample img to reduce memory\n", @@ -374,70 +344,63 @@ "outputs": [], "source": [ "# initiate the first-level GLM workflow\n", - "wf_firstlevel = Workflow(\n", - " name='wf_firstlevel',\n", - " input_spec=[\n", - " 'subj_id',\n", - " 'run_id',\n", - " 'subj_imgs',\n", - " 'subj_events',\n", - " 'subj_masks',\n", - " 'tr',\n", - " 'n_scans',\n", - " 'hrf_model',\n", - " 'smoothing_fwhm'\n", - " ],\n", - ")\n", - "\n", - "wf_firstlevel.split('run_id', run_id = wf_firstlevel.lzin.run_id)\n", - "# add task - get_firstlevel_dm\n", - "wf_firstlevel.add(\n", - " get_firstlevel_dm(\n", - " name = \"get_firstlevel_dm\",\n", - " tr = wf_firstlevel.lzin.tr, \n", - " n_scans = wf_firstlevel.lzin.n_scans, \n", - " hrf_model = wf_firstlevel.lzin.hrf_model, \n", - " subj_id = wf_firstlevel.lzin.subj_id, \n", - " run_id = wf_firstlevel.lzin.run_id, \n", - " subj_imgs = wf_firstlevel.lzin.subj_imgs, \n", - " subj_events = wf_firstlevel.lzin.subj_events,\n", + "@workflow.define(outputs=[\n", + " 'first_level_contrast',\n", + " 'first_level_effect_size_list',\n", + " 'first_level_effect_variance_list'\n", + "])\n", + "def GLMWorkflow(\n", + " subj_id,\n", + " run_id,\n", + " subj_imgs,\n", + " subj_events,\n", + " subj_masks,\n", + " tr,\n", + " n_scans,\n", + " hrf_model,\n", + " smoothing_fwhm\n", + "):\n", + " \n", + " # add task - get_firstlevel_dm\n", + " get_firstlevel_dm = workflow.add(\n", + " GetFirstLevelDm(\n", + " tr = tr, \n", + " n_scans = n_scans, \n", + " hrf_model = hrf_model, \n", + " subj_id = subj_id, \n", + " run_id = run_id, \n", + " subj_imgs = subj_imgs, \n", + " subj_events = subj_events,\n", + " )\n", " )\n", - ")\n", "\n", - "# add task - set_contrast\n", - "wf_firstlevel.add(\n", - " set_contrast(\n", - " name = \"set_contrast\",\n", - " subj_id = wf_firstlevel.lzin.subj_id,\n", - " run_id = wf_firstlevel.get_firstlevel_dm.lzout.run_id,\n", - " dm_path = wf_firstlevel.get_firstlevel_dm.lzout.dm_path\n", + " # add task - set_contrast\n", + " set_contrast = workflow.add(\n", + " SetContrast(\n", + " subj_id = subj_id,\n", + " run_id = get_firstlevel_dm.run_id,\n", + " dm_path = get_firstlevel_dm.dm_path\n", + " )\n", " )\n", - ")\n", "\n", - "# add task - firstlevel_estimation\n", - "wf_firstlevel.add(\n", - " firstlevel_estimation(\n", - " name = \"firstlevel_estimation\",\n", - " subj_id = wf_firstlevel.lzin.subj_id, \n", - " run_id = wf_firstlevel.get_firstlevel_dm.lzout.run_id, \n", - " subj_imgs = wf_firstlevel.lzin.subj_imgs, \n", - " subj_masks = wf_firstlevel.lzin.subj_masks,\n", - " smoothing_fwhm = wf_firstlevel.lzin.smoothing_fwhm, \n", - " dm_path = wf_firstlevel.get_firstlevel_dm.lzout.dm_path, \n", - " contrasts = wf_firstlevel.set_contrast.lzout.contrasts\n", + " # add task - firstlevel_estimation\n", + " firstlevel_estimation = workflow.add(\n", + " FirstLevelEstimation(\n", + " subj_id = subj_id, \n", + " run_id = get_firstlevel_dm.run_id, \n", + " subj_imgs = subj_imgs, \n", + " subj_masks = subj_masks,\n", + " smoothing_fwhm = smoothing_fwhm, \n", + " dm_path = get_firstlevel_dm.dm_path, \n", + " contrasts = set_contrast.contrasts\n", + " )\n", " )\n", - ")\n", "\n", - "\n", - "wf_firstlevel.combine('run_id')\n", - "# specify output\n", - "wf_firstlevel.set_output(\n", - " [\n", - " ('first_level_contrast', wf_firstlevel.set_contrast.lzout.contrasts),\n", - " ('first_level_effect_size_list', wf_firstlevel.firstlevel_estimation.lzout.effect_size_path_dict),\n", - " ('first_level_effect_variance_list', wf_firstlevel.firstlevel_estimation.lzout.effect_variance_path_dict),\n", - " ]\n", - ")" + " # specify output\n", + " return (set_contrast.contrasts,\n", + " firstlevel_estimation.effect_size_path_dict,\n", + " firstlevel_estimation.effect_variance_path_dict,\n", + " )" ] }, { @@ -457,18 +420,17 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'subj_id': int, \n", - " 'subj_masks': list,\n", - " 'contrasts': list,\n", - " 'effect_size_path_dict_list': list,\n", - " 'effect_variance_path_dict_list': list,\n", - " 'return': {'fixed_fx_contrast_path_dict': dict, 'fixed_fx_variance_path_dict': dict, 'fixed_fx_ttest_path_dict': dict},\n", - " }\n", + "@python.define(\n", + " outputs={'fixed_fx_contrast_path_dict': dict, 'fixed_fx_variance_path_dict': dict, 'fixed_fx_ttest_path_dict': dict},\n", ")\n", - "def get_fixed_effcts(subj_id, subj_masks, contrasts, effect_size_path_dict_list, effect_variance_path_dict_list):\n", - " print(f\"contrast:{contrast}\")\n", + "def GetFixedEffcts(\n", + " subj_id: int,\n", + " subj_masks: list,\n", + " contrasts: list,\n", + " effect_size_path_dict_list: list,\n", + " effect_variance_path_dict_list: list\n", + "):\n", + " print(f\"contrasts: {contrasts}\")\n", " print(f'Compute fixed effects for subject-{subj_id}...')\n", " # average mask across three runs\n", " mean_mask = math_img('np.mean(img, axis=-1)', img=subj_masks)\n", @@ -512,60 +474,60 @@ "outputs": [], "source": [ "# initiate the fixed effect GLM workflow\n", - "wf_fixed_effect = Workflow(\n", - " name='wf_fixed_effect',\n", - " input_spec=[\n", - " 'subj_id',\n", - " 'run_id',\n", - " 'tr',\n", - " 'n_scans',\n", - " 'hrf_model',\n", - " 'smoothing_fwhm'\n", - " ],\n", + "@workflow.define(\n", + " outputs=['first_level_contrast',\n", + " 'fx_effect_size_list',\n", + " 'fx_effect_variance_list',\n", + " 'fx_t_test_list',\n", + " ]\n", ")\n", + "def FixedEffectWorkflow(\n", + " subj_id,\n", + " run_id,\n", + " tr,\n", + " n_scans,\n", + " hrf_model,\n", + " smoothing_fwhm,\n", + "):\n", "\n", - "wf_fixed_effect.split('subj_id', subj_id = wf_fixed_effect.lzin.subj_id)\n", - "# add task - get_subj_file\n", - "wf_fixed_effect.add(\n", - " get_subjdata(\n", - " name = \"get_subjdata\",\n", - " subj_id = wf_fixed_effect.lzin.subj_id, \n", + " # add task - get_subj_file\n", + " get_subjdata = workflow.add(\n", + " GetSubjData(\n", + " subj_id = subj_id, \n", + " )\n", " )\n", - ")\n", - "wf_firstlevel.inputs.subj_id = wf_fixed_effect.get_subjdata.lzout.subj_id\n", - "wf_firstlevel.inputs.run_id = wf_fixed_effect.lzin.run_id\n", - "wf_firstlevel.inputs.tr = wf_fixed_effect.lzin.tr\n", - "wf_firstlevel.inputs.n_scans = wf_fixed_effect.lzin.n_scans\n", - "wf_firstlevel.inputs.hrf_model = wf_fixed_effect.lzin.hrf_model\n", - "wf_firstlevel.inputs.smoothing_fwhm = wf_fixed_effect.lzin.smoothing_fwhm\n", - "wf_firstlevel.inputs.subj_imgs = wf_fixed_effect.get_subjdata.lzout.subj_imgs\n", - "wf_firstlevel.inputs.subj_events = wf_fixed_effect.get_subjdata.lzout.subj_events\n", - "wf_firstlevel.inputs.subj_masks = wf_fixed_effect.get_subjdata.lzout.subj_masks\n", - "wf_fixed_effect.add(wf_firstlevel)\n", "\n", - "wf_fixed_effect.add(\n", - " get_fixed_effcts(\n", - " name = \"get_fixed_effcts\",\n", - " subj_id = wf_fixed_effect.get_subjdata.lzout.subj_id, \n", - " subj_masks = wf_fixed_effect.get_subjdata.lzout.subj_masks,\n", - " contrasts = wf_fixed_effect.wf_firstlevel.lzout.first_level_contrast, \n", - " effect_size_path_dict_list = wf_fixed_effect.wf_firstlevel.lzout.first_level_effect_size_list, \n", - " effect_variance_path_dict_list = wf_fixed_effect.wf_firstlevel.lzout.first_level_effect_variance_list\n", + " wf_firstlevel = workflow.add(\n", + " GLMWorkflow(\n", + " subj_id=get_subjdata.subj_id,\n", + " run_id=run_id,\n", + " tr=tr,\n", + " n_scans=n_scans,\n", + " hrf_model=hrf_model,\n", + " smoothing_fwhm=smoothing_fwhm,\n", + " subj_imgs=get_subjdata.subj_imgs,\n", + " subj_events=get_subjdata.subj_events,\n", + " subj_masks=get_subjdata.subj_masks,\n", + " )\n", + " )\n", + " \n", + " get_fixed_effcts = workflow.add(\n", + " GetFixedEffcts(\n", + " subj_id=get_subjdata.subj_id, \n", + " subj_masks=get_subjdata.subj_masks,\n", + " contrasts=wf_firstlevel.first_level_contrast, \n", + " effect_size_path_dict_list=wf_firstlevel.first_level_effect_size_list, \n", + " effect_variance_path_dict_list=wf_firstlevel.first_level_effect_variance_list\n", + " )\n", " )\n", - ")\n", - "\n", - "wf_fixed_effect.combine('subj_id')\n", - "# specify output\n", - "wf_fixed_effect.set_output(\n", - " [\n", - " ('first_level_contrast', wf_fixed_effect.wf_firstlevel.lzout.first_level_contrast),\n", - " ('fx_effect_size_list', wf_fixed_effect.get_fixed_effcts.lzout.fixed_fx_contrast_path_dict),\n", - " ('fx_effect_variance_list', wf_fixed_effect.get_fixed_effcts.lzout.fixed_fx_variance_path_dict),\n", - " ('fx_t_test_list', wf_fixed_effect.get_fixed_effcts.lzout.fixed_fx_ttest_path_dict),\n", - " ]\n", - ")\n", "\n", - "print(wf_fixed_effect.lzout.first_level_contrast)" + " # specify output\n", + " return (\n", + " wf_firstlevel.first_level_contrast,\n", + " get_fixed_effcts.fixed_fx_contrast_path_dict,\n", + " get_fixed_effcts.fixed_fx_variance_path_dict,\n", + " get_fixed_effcts.fixed_fx_ttest_path_dict,\n", + " )\n" ] }, { @@ -600,13 +562,10 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'n_subj': int, 'return': {'design_matrix': ty.Any}}\n", - ")\n", - "def get_secondlevel_dm(n_subj):\n", + "@python.define(outputs={'design_matrix': ty.Any})\n", + "def GetSecondLevelDm(n_subj: int):\n", " t1 = datetime.datetime.now()\n", - " print(f\"\\nGet secondlevel design matrix ...\\n\")\n", + " print(\"\\nGet secondlevel design matrix ...\\n\")\n", " design_matrix = pd.DataFrame([1] * n_subj,columns=['intercept'])\n", " return design_matrix" ] @@ -628,13 +587,9 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'firstlevel_stats_list': list, 'design_matrix': ty.Any, 'firstlevel_contrast':list, \n", - " 'return': {'secondlevel_mask': ty.Any, 'stat_maps_dict': dict}}\n", - ")\n", - "def secondlevel_estimation(firstlevel_stats_list, design_matrix, firstlevel_contrast): \n", - " print(f\"\\nStart secondlevel estimation ...\\n\")\n", + "@python.define(outputs={'secondlevel_mask': ty.Any, 'stat_maps_dict': dict})\n", + "def SecondLevelEstimation(firstlevel_stats_list: list, design_matrix, firstlevel_contrast: list): \n", + " print(\"\\nStart secondlevel estimation ...\\n\")\n", " stat_maps_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", " for index, (contrast_id, contrast_val) in enumerate(firstlevel_contrast[0][0].items()):\n", " print(' Contrast % 2i out of %i: %s' % (\n", @@ -674,42 +629,38 @@ "outputs": [], "source": [ "# initiate the first-level GLM workflow\n", - "wf_secondlevel = Workflow(\n", - " name='wf_secondlevel',\n", - " input_spec=[\n", - " 'n_subj',\n", - " 'firstlevel_stats_list', \n", - " 'firstlevel_contrast',\n", - " 'n_perm',\n", - " ],\n", + "@workflow.define(\n", + " outputs=[\n", + " 'second_level_designmatrix',\n", + " 'second_level_mask',\n", + " 'second_level_stats_map',\n", + " ]\n", ")\n", + "def SecondLevelWorkflow(\n", + " n_subj,\n", + " firstlevel_stats_list, \n", + " firstlevel_contrast,\n", + " n_perm,\n", + "):\n", "\n", - "# add task - get_secondlevel_dm\n", - "wf_secondlevel.add(\n", - " get_secondlevel_dm(\n", - " name = \"get_secondlevel_dm\",\n", - " n_subj = wf_secondlevel.lzin.n_subj, \n", - " )\n", - ")\n", + " # add task - get_secondlevel_dm\n", + " get_secondlevel_dm = workflow.add(GetSecondLevelDm(n_subj=n_subj))\n", "\n", - "# add task - secondlevel_estimation\n", - "wf_secondlevel.add(\n", - " secondlevel_estimation(\n", - " name = \"secondlevel_estimation\",\n", - " firstlevel_stats_list = wf_secondlevel.lzin.firstlevel_stats_list, \n", - " design_matrix = wf_secondlevel.get_secondlevel_dm.lzout.design_matrix, \n", - " firstlevel_contrast = wf_secondlevel.lzin.firstlevel_contrast\n", + " # add task - secondlevel_estimation\n", + " secondlevel_estimation = workflow.add(\n", + " SecondLevelEstimation(\n", + " firstlevel_stats_list=firstlevel_stats_list, \n", + " design_matrix=get_secondlevel_dm.design_matrix, \n", + " firstlevel_contrast=firstlevel_contrast\n", + " )\n", " )\n", - ")\n", "\n", - "# specify output\n", - "wf_secondlevel.set_output(\n", - " [\n", - " ('second_level_designmatrix', wf_secondlevel.get_secondlevel_dm.lzout.design_matrix),\n", - " ('second_level_mask', wf_secondlevel.secondlevel_estimation.lzout.secondlevel_mask),\n", - " ('second_level_stats_map', wf_secondlevel.secondlevel_estimation.lzout.stat_maps_dict)\n", - " ]\n", - ")" + " # specify output\n", + " return (\n", + " get_secondlevel_dm.design_matrix,\n", + " secondlevel_estimation.secondlevel_mask,\n", + " secondlevel_estimation.stat_maps_dict\n", + " )" ] }, { @@ -744,12 +695,8 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'stat_maps_dict': dict, 'threshold': float, 'cluster_threshold': int, \n", - " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", - ")\n", - "def cluster_thresholding(stat_maps_dict, threshold, cluster_threshold):\n", + "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", + "def ClusterThresholding(stat_maps_dict: dict, threshold: float, cluster_threshold: int):\n", " t1 = datetime.datetime.now()\n", " print(\"\\nStart cluster thresholding ...\\n\")\n", " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", @@ -798,12 +745,8 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'stat_maps_dict': dict, 'alpha': float, 'height_control': str, \n", - " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", - ")\n", - "def multiple_comparison(stat_maps_dict, alpha, height_control):\n", + "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", + "def MultipleComparison(stat_maps_dict: dict, alpha: float, height_control: str):\n", " print(\"\\nStart multiple comparison ...\\n\")\n", " from nilearn.glm import threshold_stats_img\n", " from nilearn.plotting import plot_stat_map\n", @@ -850,13 +793,8 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'stat_maps_dict': dict, \n", - " 'secondlevel_mask': ty.Any,\n", - " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", - ")\n", - "def parametric_test(stat_maps_dict, secondlevel_mask):\n", + "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", + "def ParametricTest(stat_maps_dict: dict, secondlevel_mask: ty.Any):\n", " print(\"\\nStart parametric test ...\\n\")\n", " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", @@ -907,13 +845,15 @@ "metadata": {}, "outputs": [], "source": [ - "@pydra.mark.task\n", - "@pydra.mark.annotate(\n", - " {'firstlevel_stats_list': list, 'smoothing_fwhm':float,'design_matrix': ty.Any, 'firstlevel_contrast': list, 'n_perm': int, \n", - " 'return': {'thresholded_map_dict': dict, 'plot_contrast_dict': dict}}\n", - ")\n", - "def nonparametric_test(firstlevel_stats_list, smoothing_fwhm, design_matrix, firstlevel_contrast, n_perm):\n", - " print(f\"\\nStart nonparametric test ...\\n\")\n", + "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", + "def NonparametricTest(\n", + " firstlevel_stats_list: list,\n", + " smoothing_fwhm: float,\n", + " design_matrix: ty.Any,\n", + " firstlevel_contrast: list,\n", + " n_perm: int\n", + "):\n", + " print(\"\\nStart nonparametric test ...\\n\")\n", " thresholded_map_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", " plot_contrast_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", " for index, (contrast_id, contrast_val) in enumerate(firstlevel_contrast[0][0].items()):\n", @@ -960,75 +900,99 @@ "metadata": {}, "outputs": [], "source": [ - "wf = Workflow(\n", - " name='twolevel_glm',\n", - " input_spec=['n_subj'],\n", + "@workflow.define(\n", + " outputs={\n", + " 'cluster_thresholded_map_dict': dict,\n", + " 'cluster_plot_contrast_dict': dict,\n", + " 'mult_comp_thresholded_map_dict': dict,\n", + " 'mult_comp_plot_contrast_dict': dict,\n", + " 'parametric_thresholded_map_dict': dict,\n", + " 'parametric_plot_contrast_dict': dict,\n", + " 'nonparametric_thresholded_map_dict': dict,\n", + " 'nonparametric_plot_contrast_dict': dict,\n", + " }\n", ")\n", + "def TwoLevelGLM(\n", + " n_subj: int,\n", + " run_id: list[int] = [1,2],\n", + " tr: float = 2.3,\n", + " n_scans: int = 300,\n", + " hrf_model: str = 'glover',\n", + " smoothing_fwhm: float = 5.0,\n", + "):\n", "\n", - "wf.inputs.n_subj = 2\n", - "\n", - "# randomly choose subjects\n", - "wf_fixed_effect.inputs.subj_id = random.sample(range(1,17), wf.inputs.n_subj)\n", - "wf_fixed_effect.inputs.run_id =[1,2]\n", - "wf_fixed_effect.inputs.tr = 2.3\n", - "wf_fixed_effect.inputs.n_scans = 300\n", - "wf_fixed_effect.inputs.hrf_model = 'glover'\n", - "wf_fixed_effect.inputs.smoothing_fwhm = 5.0\n", - "wf.add(wf_fixed_effect)\n", - "\n", - "wf_secondlevel.inputs.n_subj = wf.inputs.n_subj\n", - "wf_secondlevel.inputs.firstlevel_stats_list = wf.wf_fixed_effect.lzout.fx_t_test_list \n", - "wf_secondlevel.inputs.firstlevel_contrast = wf.wf_fixed_effect.lzout.first_level_contrast\n", - "wf.add(wf_secondlevel)\n", + " # randomly choose subjects\n", + " fixed_effect = workflow.add(\n", + " FixedEffectWorkflow(\n", + " run_id=run_id,\n", + " tr=tr,\n", + " n_scans=n_scans,\n", + " hrf_model=hrf_model,\n", + " smoothing_fwhm=smoothing_fwhm,\n", + " )\n", + " .split(subj_id=random.sample(range(1,17), n_subj))\n", + " .combine('subj_id')\n", + " )\n", "\n", - "# add task - cluster_thresholding\n", - "wf.add(\n", - " cluster_thresholding(\n", - " name = \"cluster_thresholding\",\n", - " stat_maps_dict = wf.wf_secondlevel.lzout.second_level_stats_map, \n", - " threshold = 3.29, \n", - " cluster_threshold = 10\n", + " secondlevel = workflow.add(\n", + " SecondLevelWorkflow(\n", + " n_subj = n_subj,\n", + " firstlevel_stats_list=fixed_effect.fx_t_test_list,\n", + " firstlevel_contrast=fixed_effect.first_level_contrast,\n", + " )\n", + " )\n", + " \n", + " \n", + " # add task - cluster_thresholding\n", + " cluster_thresholding = workflow.add(\n", + " ClusterThresholding(\n", + " stat_maps_dict=secondlevel.second_level_stats_map, \n", + " threshold=3.29, \n", + " cluster_threshold=10\n", + " )\n", " )\n", - ")\n", "\n", "\n", - "# add task - multiple_comparison\n", - "wf.add(\n", - " multiple_comparison(\n", - " name = \"multiple_comparison\",\n", - " stat_maps_dict = wf.wf_secondlevel.lzout.second_level_stats_map, \n", - " alpha = 0.05,\n", - " height_control = 'fdr'\n", + " # add task - multiple_comparison\n", + " multiple_comparison = workflow.add(\n", + " MultipleComparison(\n", + " stat_maps_dict=secondlevel.second_level_stats_map, \n", + " alpha=0.05,\n", + " height_control='fdr'\n", + " )\n", " )\n", - ")\n", "\n", - "# add task - parametric_test\n", - "wf.add(\n", - " parametric_test(\n", - " name = \"parametric_test\",\n", - " stat_maps_dict = wf.wf_secondlevel.lzout.second_level_stats_map, \n", - " secondlevel_mask = wf.wf_secondlevel.lzout.second_level_mask\n", + " # add task - parametric_test\n", + " parametric_test = workflow.add(\n", + " ParametricTest(\n", + " stat_maps_dict=secondlevel.second_level_stats_map, \n", + " secondlevel_mask=secondlevel.second_level_mask\n", + " )\n", + " \n", " )\n", - " \n", - ")\n", "\n", - "# add task - nonparametric_test\n", - "wf.add(\n", - " nonparametric_test(\n", - " name = \"nonparametric_test\",\n", - " firstlevel_stats_list = wf.wf_fixed_effect.lzout.fx_t_test_list,\n", - " smoothing_fwhm = 5.0,\n", - " design_matrix = wf.wf_secondlevel.lzout.second_level_designmatrix,\n", - " firstlevel_contrast = wf.wf_fixed_effect.lzout.first_level_contrast,\n", - " n_perm = 100,\n", + " # add task - nonparametric_test\n", + " nonparametric_test = workflow.add(\n", + " NonparametricTest(\n", + " firstlevel_stats_list=fixed_effect.fx_t_test_list,\n", + " smoothing_fwhm=5.0,\n", + " design_matrix=secondlevel.second_level_designmatrix,\n", + " firstlevel_contrast=fixed_effect.first_level_contrast,\n", + " n_perm=100,\n", + " )\n", " )\n", - ")\n", "\n", - "wf.set_output(\n", - " [\n", - " ('second_level_stats_map', wf.wf_secondlevel.lzout.second_level_stats_map) \n", - " ]\n", - ")" + " return (\n", + " secondlevel.second_level_stats_map,\n", + " cluster_thresholding.thresholded_map_dict,\n", + " cluster_thresholding.plot_contrast_dict,\n", + " multiple_comparison.thresholded_map_dict,\n", + " multiple_comparison.plot_contrast_dict,\n", + " parametric_test.thresholded_map_dict,\n", + " parametric_test.plot_contrast_dict,\n", + " nonparametric_test.thresholded_map_dict,\n", + " nonparametric_test.plot_contrast_dict\n", + " )\n" ] }, { @@ -1052,6 +1016,8 @@ "source": [ "from pydra import Submitter\n", "\n", + "wf = TwoLevelGLM(n_subj=2)\n", + "\n", "with Submitter(plugin='cf', n_procs=1) as submitter:\n", " submitter(wf)\n", "\n", From 02f6a1cecc995e6986dd55582923fb916120187f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 15:04:42 +1100 Subject: [PATCH 116/342] more touching up of documentation --- new-docs/source/index.rst | 1 + new-docs/source/tutorial/canonical-form.ipynb | 172 ++++++++++++ .../source/tutorial/getting-started.ipynb | 52 +++- new-docs/source/tutorial/python.ipynb | 260 ++++++++++-------- new-docs/source/tutorial/shell.ipynb | 33 +-- new-docs/source/tutorial/tst.py | 10 + new-docs/source/tutorial/workflow.ipynb | 36 +-- pydra/design/base.py | 2 + pydra/design/python.py | 2 +- pydra/engine/specs.py | 9 +- pydra/utils/typing.py | 3 +- 11 files changed, 376 insertions(+), 204 deletions(-) create mode 100644 new-docs/source/tutorial/canonical-form.ipynb create mode 100644 new-docs/source/tutorial/tst.py diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 75f2223241..cf41923f01 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -88,6 +88,7 @@ Indices and tables tutorial/python tutorial/shell tutorial/workflow + tutorial/canonical-form .. toctree:: diff --git a/new-docs/source/tutorial/canonical-form.ipynb b/new-docs/source/tutorial/canonical-form.ipynb new file mode 100644 index 0000000000..4fd063a694 --- /dev/null +++ b/new-docs/source/tutorial/canonical-form.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Canonical (dataclass) task form" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Python-task definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "from pydra.engine.specs import PythonDef, PythonOutputs\n", + "from pydra.design import python\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a * b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shell-task definitions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from fileformats import generic\n", + "from pydra.design import shell\n", + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "from pydra.utils.typing import MultiInputObj\n", + "\n", + "\n", + "@shell.define\n", + "class CpWithSize(ShellDef[\"CpWithSize.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[generic.FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\")\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " class Outputs(ShellOutputs):\n", + "\n", + " @staticmethod\n", + " def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + " out_file: generic.File\n", + " out_file_size: int = shell.out(callable=get_file_size)\n", + "\n", + "\n", + "pprint(fields_dict(CpWithSize))\n", + "pprint(fields_dict(CpWithSize.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workflow definitions\n", + "\n", + "Like with Python and shell tasks, it is also possible to specify workflows in \"dataclass form\" in order to be more explicit to linters, which can be worth the extra effort when creating a suite of workflows to be shared publicly. In this case the workflow constructor should be a static method of the dataclasss named `constructor`.\n", + "\n", + "This form also lends itself to defining custom converters and validators on the fields" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python, workflow\n", + "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b\n", + "\n", + "\n", + "@workflow.define\n", + "class CanonicalWorkflowDef(WorkflowDef[\"CanonicalWorkflowDef.Outputs\"]):\n", + "\n", + " @staticmethod\n", + " def a_converter(value):\n", + " if value is None:\n", + " return value\n", + " return float(value)\n", + "\n", + " a: int\n", + " b: float = workflow.arg(\n", + " help_string=\"A float input\",\n", + " converter=a_converter,\n", + " )\n", + "\n", + " @staticmethod\n", + " def constructor(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out\n", + "\n", + " class Outputs(WorkflowOutputs):\n", + " out: float" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index b075fd448e..45022032af 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -25,7 +25,7 @@ "* instantiate it with appropriate parameters\n", "* \"call\" resulting object (i.e. `my_task(...)`) to execute it as you would a function \n", "\n", - "To demonstrate with a toy example of loading a JSON file with the\n", + "To demonstrate with an example of loading a JSON file with the\n", "`pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" ] }, @@ -68,10 +68,29 @@ "load_json = LoadJson(file=json_file)\n", "\n", "# Run the task to load the JSON file\n", - "result = load_json()\n", + "outputs = load_json()\n", "\n", "# Access the loaded JSON output contents and check they match original\n", - "assert result.output.out == JSON_CONTENTS" + "assert outputs.out == JSON_CONTENTS" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to access a richer `Result` object you can use a Submitter object to execute the task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine import Submitter\n", + "\n", + "with Submitter(plugin='cf', n_procs=1) as submitter:\n", + " result = submitter(load_json)" ] }, { @@ -133,10 +152,10 @@ "mrgrid = MrGrid(voxel=0.5).split(input=nifti_dir.iterdir())\n", "\n", "# Run the task to resample all NIfTI files\n", - "result = mrgrid()\n", + "outputs = mrgrid()\n", "\n", "# Print the locations of the output files\n", - "print(\"\\n\".join(str(p) for p in result.output.output))" + "print(\"\\n\".join(str(p) for p in outputs.output))" ] }, { @@ -166,8 +185,12 @@ " voxel=VOXEL_SIZES\n", ")\n", "\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", "# Run the task to resample all NIfTI files with different voxel sizes\n", - "result = mrgrid_varying_vox_sizes(cache_dir=test_dir / \"cache\")" + "with submitter:\n", + " result1 = submitter(mrgrid_varying_vox_sizes)" ] }, { @@ -196,20 +219,24 @@ " voxel=VOXEL_SIZES\n", ")\n", "\n", + "\n", "# Result from previous run is reused as the task and inputs are identical\n", - "result1 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "with submitter:\n", + " result2 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", "\n", "# Check that the output directory is the same for both runs\n", - "assert result1.output_dir == result.output_dir\n", + "assert result2.output_dir == result1.output_dir\n", "\n", "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", "\n", "# Result from previous run is reused as the task and inputs are identical\n", - "result2 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox_sizes2)\n", "\n", "# The output directory will be different as the inputs are now different\n", - "assert result2.output_dir != result.output_dir" + "assert result3.output_dir != result1.output_dir" ] }, { @@ -241,10 +268,11 @@ ")\n", "\n", "# Result from previous run is reused as the task and inputs are identical\n", - "result3 = mrgrid_varying_vox_sizes2(cache_dir=test_dir / \"cache\")\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox_sizes2)\n", "\n", "# Check that the output directory is the same for both runs\n", - "assert result3.output_dir == result.output_dir" + "assert result4.output_dir == result1.output_dir" ] }, { diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/python.ipynb index a80458a06e..d79766f9db 100644 --- a/new-docs/source/tutorial/python.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -4,103 +4,173 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Python-tasks" + "# Python-tasks\n", + "\n", + "Python task definitions are Python functions that are parameterised in a separate step before\n", + "they are executed or added to a workflow.\n", + "\n", + "## Define decorator\n", + "\n", + "The simplest way to define a Python task is to decorate a function with `pydra.design.python.define`" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "# Note that we use CamelCase as the return of the is a class\n", + "@python.define\n", + "def MyFirstTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting task-definition class can be then parameterized (instantiated), and\n", + "executed" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev103+g1a6b067c.d20241228\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "funcOutputs(out=2.0)\n" + "3.0\n" ] } ], "source": [ - "from pydra.design import python\n", - "\n", + "# Instantiate the task, setting all parameters\n", + "my_first_task = MyFirstTaskDef(a=1, b=2.0)\n", "\n", - "def func(a: int) -> float:\n", - " \"\"\"Sample function with inputs and outputs\"\"\"\n", - " return a * 2\n", + "# Execute the task\n", + "outputs = my_first_task()\n", "\n", - "SampleDef = python.define(func)\n", - "\n", - "spec = SampleDef(a=1)\n", - "result = spec()\n", - "print(result.output)" + "print(outputs.out)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### With typing" + "By default, the name of the output field for a function with only one output is `out`. To\n", + "name this something else, in the case where there are multiple output fields, the `outputs`\n", + "argument can be provided to `python.define`\n" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NamedOutputTaskOutputs(c=3, d=1)\n" + ] + } + ], "source": [ + "@python.define(outputs=[\"c\", \"d\"])\n", + "def NamedOutputTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a - b\n", + "\n", + "named_output_task = NamedOutputTaskDef(a=2, b=1)\n", "\n", - "def func(a: int, k: float = 2.0) -> float:\n", - " \"\"\"Sample function with inputs and outputs\"\"\"\n", - " return a * k\n", + "outputs = named_output_task()\n", "\n", - "SampleDef = python.define(func)\n" + "print(outputs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Augment with explicit inputs and outputs\n" + "The input and output field attributes automatically extracted from the function, explicit\n", + "attributes can be augmented" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "from decimal import Decimal\n", - "\n", - "def func(a: int) -> float:\n", - " \"\"\"Sample function with inputs and outputs\"\"\"\n", - " return a * 2\n", - "\n", - "SampleDef = python.define(\n", - " func,\n", - " inputs={\"a\": python.arg(help_string=\"The argument to be doubled\")},\n", - " outputs={\"b\": python.out(help_string=\"the doubled output\", type=Decimal)},\n", - ")" + "@python.define(\n", + " inputs={\"a\": python.arg(allowed_values=[1, 2, 3]), \"b\": python.arg(default=10.0)},\n", + " outputs={\n", + " \"c\": python.out(type=float, help_string=\"the sum of the inputs\"),\n", + " \"d\": python.out(type=float, help_string=\"the difference of the inputs\"),\n", + " },\n", + ")\n", + "def AugmentedTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a - b" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Decorated_function" + "## Type annotations\n", + "\n", + "If provided, type annotations are included in the task definition, and are checked at\n", + "the time of parameterisation." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Type error caught: Incorrect type for field in 'a' field of MyTypedTask interface : 1.5 is not of type (and cannot be coerced to it)\n" + ] + } + ], "source": [ - "# Note we use CamelCase as the function is translated to a class\n", + "from pydra.design import python\n", "\n", - "@python.define(outputs=[\"c\", \"d\"])\n", - "def SampleDef(a: int, b: float) -> tuple[float, float]:\n", + "# Note that we use CamelCase as the function is translated to a class\n", + "\n", + "@python.define\n", + "def MyTypedTask(a: int, b: float) -> float:\n", " \"\"\"Sample function for testing\"\"\"\n", - " return a + b, a * b" + " return a + b\n", + "\n", + "try:\n", + " # 1.5 is not an integer so this should raise a TypeError\n", + " my_typed_task = MyTypedTask(a=1.5, b=2.0)\n", + "except TypeError as e:\n", + " print(f\"Type error caught: {e}\")\n", + "else:\n", + " assert False, \"Expected a TypeError\"\n", + "\n", + "# While 2 is an integer, it can be implicitly coerced to a float\n", + "my_typed_task = MyTypedTask(a=1, b=2)" ] }, { @@ -108,7 +178,12 @@ "metadata": {}, "source": [ "\n", - "## Pull helps from docstring" + "## Docstring parsing\n", + "\n", + "Instead of explicitly providing help strings and output names in `inputs` and `outputs`\n", + "arguments, if the function describes the its inputs and/or outputs in the doc string, \n", + "in either reST, Google or NumpyDoc style, then they will be extracted and included in the\n", + "input or output fields\n" ] }, { @@ -132,8 +207,8 @@ "from pprint import pprint\n", "from pydra.engine.helpers import fields_dict\n", "\n", - "@python.define(outputs=[\"c\", \"d\"])\n", - "def SampleDef(a: int, b: float) -> tuple[float, float]:\n", + "@python.define\n", + "def DocStrDef(a: int, b: float) -> tuple[float, float]:\n", " \"\"\"Sample function for testing\n", "\n", " Args:\n", @@ -147,132 +222,77 @@ " \"\"\"\n", " return a + b, a * b\n", "\n", - "pprint(fields_dict(SampleDef))\n", - "pprint(fields_dict(SampleDef.Outputs))" + "pprint(fields_dict(DocStrDef))\n", + "pprint(fields_dict(DocStrDef.Outputs))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Dataclass form" + "## Wrapping external functions\n", + "\n", + "Like all decorators, `python.define` is just a function, so can also be used to convert\n", + "a function that is defined separately into a Python task definition." ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=2.0, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" + "[3.5]\n" ] } ], "source": [ + "import numpy as np\n", "\n", - "@python.define\n", - "class SampleDef:\n", - " \"\"\"Sample class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float = 2.0\n", - "\n", - " class Outputs:\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", + "NumpyCorrelate = python.define(np.correlate)\n", "\n", - " c: float\n", - " d: float\n", + "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a * b\n", + "outputs = numpy_correlate()\n", "\n", - "pprint(fields_dict(SampleDef))\n", - "pprint(fields_dict(SampleDef.Outputs))" + "print(outputs.out)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Canonical form (to work with static type-checking)" + "Like with decorated functions, input and output fields can be explicitly augmented via\n", + "the `inputs` and `outputs` arguments" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" + "[3.5]\n" ] } ], "source": [ - "from pydra.engine.specs import PythonDef, PythonOutputs\n", - "\n", - "@python.define\n", - "class SampleDef(PythonDef[\"SampleDef.Outputs\"]):\n", - " \"\"\"Sample class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float\n", + "import numpy as np\n", "\n", - " @python.outputs\n", - " class Outputs(PythonOutputs):\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", + "NumpyCorrelate = python.define(np.correlate, outputs=[\"correlation\"])\n", "\n", - " c: float\n", - " d: float\n", + "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a * b\n", + "outputs = numpy_correlate()\n", "\n", - "pprint(fields_dict(SampleDef))\n", - "pprint(fields_dict(SampleDef.Outputs))" + "print(outputs.correlation)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index fa68492374..96e8a32f74 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -356,15 +356,6 @@ "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dataclass form\n", - "\n", - "Like with Python tasks, shell-tasks can also be specified in dataclass-form by using `shell.define` as a decorator." - ] - }, { "cell_type": "code", "execution_count": 9, @@ -388,29 +379,7 @@ ] } ], - "source": [ - "from fileformats.generic import FsObject, Directory\n", - "from pydra.utils.typing import MultiInputObj\n", - "\n", - "@shell.define\n", - "class CpWithSize:\n", - "\n", - " executable = \"cp\"\n", - "\n", - " in_fs_objects: MultiInputObj[FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\")\n", - " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", - "\n", - " class Outputs:\n", - " out_file: File\n", - " out_file_size: int = shell.out(callable=get_file_size)\n", - "\n", - "\n", - "pprint(fields_dict(CpWithSize))\n", - "pprint(fields_dict(CpWithSize.Outputs))" - ] + "source": [] }, { "cell_type": "markdown", diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py new file mode 100644 index 0000000000..a32d86eff0 --- /dev/null +++ b/new-docs/source/tutorial/tst.py @@ -0,0 +1,10 @@ +import numpy as np +from pydra.design import python + +NumpyCorrelate = python.define(np.correlate) + +numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5]) + +outputs = numpy_correlate() + +print(outputs.out) diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index 46e14099e7..7d3bd9d79b 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -194,46 +194,14 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Dataclass form\n", - "\n", - "Like with Python and shell tasks, it is also possible to specify workflows in \"dataclass form\" in order to be more explicit to linters, which can be worth the extra effort when creating a suite of workflows to be shared publicly. In this case the workflow constructor should be a static method of the dataclasss named `constructor`.\n", - "\n", - "This form also lends itself to defining custom converters and validators on the fields" - ] + "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], - "source": [ - "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", - "\n", - "def a_converter(value):\n", - " if value is None:\n", - " return value\n", - " return float(value)\n", - "\n", - "@workflow.define\n", - "class LibraryWorkflow(WorkflowDef[\"MyLibraryWorkflow.Outputs\"]):\n", - "\n", - " a: int\n", - " b: float = workflow.arg(\n", - " help_string=\"A float input\",\n", - " converter=a_converter,\n", - " )\n", - "\n", - " @staticmethod\n", - " def constructor(a, b):\n", - " add = workflow.add(Add(a=a, b=b))\n", - " mul = workflow.add(Mul(a=add.out, b=b))\n", - " return mul.out\n", - "\n", - " @workflow.outputs\n", - " class Outputs(WorkflowOutputs):\n", - " out: float" - ] + "source": [] }, { "cell_type": "markdown", diff --git a/pydra/design/base.py b/pydra/design/base.py index 8e9b42435a..09f5dbc230 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -817,6 +817,8 @@ def extract_function_inputs_and_outputs( f"of the return types ({return_types})" ) output_types = dict(zip(outputs, return_types)) + else: + output_types = {o: ty.Any for o in outputs} if isinstance(outputs, dict): for output_name, output in outputs.items(): if isinstance(output, Out) and output.type is ty.Any: diff --git a/pydra/design/python.py b/pydra/design/python.py index febc74b98a..afd1a03cd7 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -134,7 +134,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: klass, arg, out, auto_attribs ) else: - if not inspect.isfunction(wrapped): + if not isinstance(wrapped, ty.Callable): raise ValueError( f"wrapped must be a class or a function, not {wrapped!r}" ) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index a30861ca34..5130296ab2 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -189,7 +189,7 @@ def __call__( messengers=None, rerun=False, **kwargs, - ) -> "Result[OutputsType]": + ) -> OutputsType: """Create a task from this definition and execute it to produce a result. Parameters @@ -217,8 +217,8 @@ def __call__( Returns ------- - Result - The result of the task + Outputs + The output interface of the task """ self._check_rules() task = self.Task( @@ -233,7 +233,8 @@ def __call__( messengers=messengers, rerun=rerun, ) - return task(**kwargs) + result = task(**kwargs) + return result.output def __iter__(self) -> ty.Generator[str, None, None]: """Iterate through all the names in the definition""" diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 976f59b431..3212771434 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -10,7 +10,7 @@ import attr from pydra.utils import add_exc_note from fileformats import field, core, generic -from pydra.engine.helpers import is_lazy + try: from typing import get_origin, get_args @@ -214,6 +214,7 @@ def __call__(self, obj: ty.Any) -> T: if the coercion is not possible, or not specified by the `coercible`/`not_coercible` parameters, then a TypeError is raised """ + from pydra.engine.helpers import is_lazy coerced: T if obj is attr.NOTHING: From 45022609905a2409b888b39076a295a161c4c5eb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:29:21 +1100 Subject: [PATCH 117/342] finished python task and canonical form tuts --- .../source/examples/first-level-glm.ipynb | 4 +- new-docs/source/examples/two-level-glm.ipynb | 4 +- new-docs/source/index.rst | 3 + .../source/tutorial/advanced-execution.ipynb | 186 +++++++++++++++++- new-docs/source/tutorial/canonical-form.ipynb | 135 ++++++++++++- .../source/tutorial/getting-started.ipynb | 91 +-------- new-docs/source/tutorial/python.ipynb | 4 +- new-docs/source/tutorial/tst.py | 10 - 8 files changed, 319 insertions(+), 118 deletions(-) delete mode 100644 new-docs/source/tutorial/tst.py diff --git a/new-docs/source/examples/first-level-glm.ipynb b/new-docs/source/examples/first-level-glm.ipynb index aecba6e5dd..914ac6b9e3 100644 --- a/new-docs/source/examples/first-level-glm.ipynb +++ b/new-docs/source/examples/first-level-glm.ipynb @@ -5,7 +5,7 @@ "id": "c8149a94", "metadata": {}, "source": [ - "# First Level GLM (from Nilearn)" + "# One-Level GLM" ] }, { @@ -13,7 +13,7 @@ "id": "b54b132a", "metadata": {}, "source": [ - "In this tutorial, we will go through a simple workflow of the first level general linear modeling with a BIDS dataset from openneuro. This analysis is only performed on **one** subject.\n", + "In this tutorial, which is adapted from the Nilearn docs, we will go through a simple workflow of the first level general linear modeling with a BIDS dataset from openneuro. This analysis is only performed on **one** subject.\n", "\n", "This tutorial is based on the [Nilearn GLM tutorial](https://nilearn.github.io/stable/auto_examples/04_glm_first_level/plot_bids_features.html#sphx-glr-auto-examples-04-glm-first-level-plot-bids-features-py)." ] diff --git a/new-docs/source/examples/two-level-glm.ipynb b/new-docs/source/examples/two-level-glm.ipynb index 5886d7335f..4285234d37 100644 --- a/new-docs/source/examples/two-level-glm.ipynb +++ b/new-docs/source/examples/two-level-glm.ipynb @@ -5,7 +5,7 @@ "id": "058a991d", "metadata": {}, "source": [ - "# Two-Level GLM (from Nilearn)" + "# Two-Level GLM" ] }, { @@ -13,7 +13,7 @@ "id": "2b4f98e0", "metadata": {}, "source": [ - "In this tutorial, we demonstrate how to write pydra tasks for the first level (subject-level) GLM and the second level (group-level) GLM in Nilearn. We use the data from [Balloon Analog Risk-taking Task](https://openneuro.org/datasets/ds000001/versions/1.0.0). \n", + "In this tutorial, which is adapted from the Nilearn docs, we demonstrate how to write pydra tasks for the first level (subject-level) GLM and the second level (group-level) GLM in Nilearn. We use the data from [Balloon Analog Risk-taking Task](https://openneuro.org/datasets/ds000001/versions/1.0.0). \n", "Basic information about this dataset:\n", "- 16 subjects\n", "- 3 runs\n", diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index cf41923f01..72318fa64d 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -54,11 +54,14 @@ Tutorials * :ref:`Python-tasks` * :ref:`Shell-tasks` * :ref:`Workflows` +* :ref:`Canonical (dataclass) task form` Examples -------- * :ref:`T1w MRI preprocessing` +* :ref:`One-level GLM` +* :ref:`Two-Level GLM` How-to Guides ------------- diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 63a7673daf..41d7025540 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -27,7 +27,169 @@ "source": [ "## Caching results\n", "\n", - "See [Caching and hashing](../explanation/hashing-caching.html) for more details." + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", + "task and the operation to be performed. This hash is used to name the output directory for\n", + "the task within the specified cache directory. Therefore, if you use the same cache\n", + "directory between runs and in a subsequent run the same task is executed with the same\n", + "inputs then the location of its output directory will also be the same, and the outputs\n", + "generated by the previous run are reused.\n", + "\n", + "For example, using the MrGrid example from the [Getting Started Tutorial](./getting-started.html)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'MrGrid' from 'pydra.tasks.mrtrix3' (/Users/tclose/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/__init__.py)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'MrGrid' from 'pydra.tasks.mrtrix3' (/Users/tclose/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/__init__.py)" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "import tempfile\n", + "from fileformats.medimage import Nifti\n", + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.mrtrix3 import MrGrid\n", + "\n", + "# Make directory filled with nifti files\n", + "test_dir = Path(tempfile.mkdtemp())\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "for i in range(10):\n", + " Nifti.sample(nifti_dir, seed=i)\n", + "\n", + "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", + "\n", + "mrgrid_varying_vox_sizes = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Run the task to resample all NIfTI files with different voxel sizes\n", + "with submitter:\n", + " result1 = submitter(mrgrid_varying_vox_sizes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we attempt to run the same task with the same parameterisation the cache directory\n", + "will point to the same location and the results will be reused" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'MrGrid' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mrgrid_varying_vox_sizes2 \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 2\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 4\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m submitter \u001b[38;5;241m=\u001b[39m Submitter(cache_dir\u001b[38;5;241m=\u001b[39mtest_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Result from previous run is reused as the task and inputs are identical\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'MrGrid' is not defined" + ] + } + ], + "source": [ + "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result2 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result2.output_dir == result1.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result3.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'nifti_dir' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Rename a NIfTI file within the test directory\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m first_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 3\u001b[0m first_file\u001b[38;5;241m.\u001b[39mrename(first_file\u001b[38;5;241m.\u001b[39mwith_name(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfirst.nii.gz\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 5\u001b[0m mrgrid_varying_vox_sizes3 \u001b[38;5;241m=\u001b[39m MrGrid()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 6\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 8\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 9\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" + ] + } + ], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "first_file = next(nifti_dir.iterdir())\n", + "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", + "\n", + "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result4.output_dir == result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See [Caching and hashing](../explanation/hashing-caching.html) for more details on how inputs\n", + "are hashed for caching and issues to consider." ] }, { @@ -36,6 +198,8 @@ "source": [ "## Environments (containers)\n", "\n", + "Work in progress...\n", + "\n", "See [Containers and Environments](../explanation/environments.rst) for more details." ] }, @@ -43,7 +207,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Provenance" + "## Provenance\n", + "\n", + "Work in progress..." ] }, { @@ -53,8 +219,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/new-docs/source/tutorial/canonical-form.ipynb b/new-docs/source/tutorial/canonical-form.ipynb index 4fd063a694..6a957aa8fa 100644 --- a/new-docs/source/tutorial/canonical-form.ipynb +++ b/new-docs/source/tutorial/canonical-form.ipynb @@ -4,14 +4,34 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Canonical (dataclass) task form" + "# Canonical (dataclass) task form\n", + "\n", + "Under the hood, all Python, shell and workflow task definitions generated by the\n", + "`pydra.design.*.define` decorators/functions are translated to\n", + "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", + "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", + "in the [Python-tasks](./python.html), [Shell-tasks](./shell.html) and [Workflow](./workflow.html)\n", + "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", + "for linters follow. Therefore, when designing task definitions to be used by third\n", + "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", + "explicit, \"canonical\" dataclass form.\n", + "\n", + "The syntax of the canonical form is close to that used by the\n", + "[Attrs](https://www.attrs.org/en/stable/) package itself, with class type annotations\n", + "used to define the fields of the inputs and outputs of the task. Tasks defined in canonical\n", + "form will be able to be statically type-checked by [MyPy](https://mypy-lang.org/)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Python-task definitions" + "## Python-task definitions\n", + "\n", + "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", + "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", + "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", + "Default values can also be set directly, as with Attrs classes.\n" ] }, { @@ -27,7 +47,7 @@ "\n", "\n", "@python.define\n", - "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + "class CanonicalPythonDef:\n", " \"\"\"Canonical Python task definition class for testing\n", "\n", " Args:\n", @@ -37,9 +57,57 @@ " \"\"\"\n", "\n", " a: int\n", - " b: float = 2.0\n", + " b: float = 2.0 # set default value\n", "\n", - " class Outputs(PythonOutputs):\n", + " class Outputs:\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To set additional attributes other than the type and default, such as `allowed_values`\n", + "and `validators`, `python.arg` and `python.out` can be used instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import attrs.validators\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef:\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", + " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", + "\n", + " class Outputs:\n", " \"\"\"\n", " Args:\n", " c: Sum of a and b\n", @@ -51,7 +119,7 @@ "\n", " @staticmethod\n", " def function(a, b):\n", - " return a + b, a * b\n", + " return a + b, a / b\n", "\n", "pprint(fields_dict(CanonicalPythonDef))\n", "pprint(fields_dict(CanonicalPythonDef.Outputs))" @@ -61,7 +129,55 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Shell-task definitions" + "In order to allow static type-checkers to check the type of outputs of tasks added\n", + "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", + "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", + "`define` method implicitly). Thus the \"canonical\" is as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0 # set default value\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shell-task definitions\n", + "\n", + "The canonical form of shell tasks is the same as for Python tasks, except a string `executable`\n", + "attribute replaces the `function` staticmethod." ] }, { @@ -111,9 +227,8 @@ "source": [ "## Workflow definitions\n", "\n", - "Like with Python and shell tasks, it is also possible to specify workflows in \"dataclass form\" in order to be more explicit to linters, which can be worth the extra effort when creating a suite of workflows to be shared publicly. In this case the workflow constructor should be a static method of the dataclasss named `constructor`.\n", - "\n", - "This form also lends itself to defining custom converters and validators on the fields" + "Workflows can also be defined in canonical form, which is the same as for Python tasks\n", + "but with a staticmethod called `constructor` that constructs the workflow." ] }, { diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index 45022032af..c87709b7c4 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -87,7 +87,7 @@ "metadata": {}, "outputs": [], "source": [ - "from pydra.engine import Submitter\n", + "from pydra.engine.submitter import Submitter\n", "\n", "with Submitter(plugin='cf', n_procs=1) as submitter:\n", " result = submitter(load_json)" @@ -185,94 +185,7 @@ " voxel=VOXEL_SIZES\n", ")\n", "\n", - "\n", - "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", - "\n", - "# Run the task to resample all NIfTI files with different voxel sizes\n", - "with submitter:\n", - " result1 = submitter(mrgrid_varying_vox_sizes)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cache directories\n", - "\n", - "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", - "task and the operation to be performed. This hash is used to name the output directory for\n", - "the task within the specified cache directory. Therefore, if you use the same cache\n", - "directory between runs and in a subsequent run the same task is executed with the same\n", - "inputs then the location of its output directory will also be the same, and the outputs\n", - "generated by the previous run are reused." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result2 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result2.output_dir == result1.output_dir\n", - "\n", - "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", - "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result3 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "# The output directory will be different as the inputs are now different\n", - "assert result3.output_dir != result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that for file objects, the contents of the files are used to calculate the hash\n", - "not their paths. Therefore, when inputting large files there might be some additional\n", - "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", - "shouldn't need to be recalculated unless they are modified). However, this makes the\n", - "hashes invariant to file-system movement. For example, changing the name of one of the\n", - "files in the nifti directory won't invalidate the hash." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Rename a NIfTI file within the test directory\n", - "first_file = next(nifti_dir.iterdir())\n", - "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", - "\n", - "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result4 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result4.output_dir == result1.output_dir" + "print(mrgrid_varying_vox_sizes().output)" ] }, { diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/python.ipynb index d79766f9db..8d9370c523 100644 --- a/new-docs/source/tutorial/python.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -78,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [ { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py deleted file mode 100644 index a32d86eff0..0000000000 --- a/new-docs/source/tutorial/tst.py +++ /dev/null @@ -1,10 +0,0 @@ -import numpy as np -from pydra.design import python - -NumpyCorrelate = python.define(np.correlate) - -numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5]) - -outputs = numpy_correlate() - -print(outputs.out) From d71542f9b243340b82304fcc450dae660bd96cf5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:31:16 +1100 Subject: [PATCH 118/342] added dependencies required for examples --- pyproject.toml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 5a6a96c414..a37aafdd1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,12 @@ doc = [ "sphinx-argparse", "nbsphinx", "ipython", + "nibabel", + "nilearn", + "pandas", "pandoc", + "numpy", + "scipy", "sphinx_rtd_theme", "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", From 70edf858a444bc259c0f56698730e5aa8a6deba8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:37:08 +1100 Subject: [PATCH 119/342] touching up tutorials --- new-docs/source/tutorial/advanced-execution.ipynb | 4 +++- new-docs/source/tutorial/shell.ipynb | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 41d7025540..7b16a1fe29 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -18,7 +18,9 @@ "- `ConcurrentFutures`\n", "- `SLURM`\n", "- `Dask` (experimental)\n", - "- `Serial` (for debugging)" + "- `Serial` (for debugging)\n", + "\n", + "Work in progress..." ] }, { diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index 96e8a32f74..8830af4e2e 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -60,7 +60,7 @@ "with open(test_file, \"w\") as f:\n", " f.write(\"Contents to be copied\")\n", "\n", - "# Parameterise the task spec\n", + "# Parameterise the task definition\n", "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", "\n", "# Print the cmdline to be run to double check\n", From 9594e7503c14a40f56c9a35b98951bd5b6a9e45b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:38:38 +1100 Subject: [PATCH 120/342] renamed spec to definition across the board --- docs/changes.rst | 2 +- docs/input_spec.rst | 6 +- docs/output_spec.rst | 4 +- new-docs/source/tutorial/shell.ipynb | 2 +- pydra/design/base.py | 4 +- pydra/design/boutiques.py | 14 +-- pydra/design/python.py | 4 +- pydra/design/tests/test_python.py | 4 +- pydra/design/workflow.py | 4 +- pydra/engine/core.py | 62 ++++++----- pydra/engine/helpers.py | 26 ++--- pydra/engine/helpers_file.py | 33 +++--- pydra/engine/helpers_state.py | 2 +- pydra/engine/specs.py | 10 +- pydra/engine/task.py | 43 ++++---- pydra/engine/tests/test_boutiques.py | 12 +-- pydra/engine/tests/test_dockertask.py | 26 ++--- pydra/engine/tests/test_helpers_file.py | 6 +- pydra/engine/tests/test_nipype1_convert.py | 10 +- pydra/engine/tests/test_shelltask.py | 100 +++++++++--------- .../engine/tests/test_shelltask_inputspec.py | 52 ++++----- pydra/engine/tests/test_singularity.py | 22 ++-- pydra/engine/tests/test_specs.py | 22 ++-- pydra/engine/tests/test_task.py | 8 +- pydra/engine/tests/test_workflow.py | 8 +- pydra/engine/workers.py | 32 +++--- pydra/engine/workflow/base.py | 24 +++-- pydra/engine/workflow/node.py | 12 +-- pydra/utils/typing.py | 2 +- 29 files changed, 289 insertions(+), 267 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index c3d2814f85..cec100a607 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -6,7 +6,7 @@ Release Notes * refactoring template formatting for ``input_spec`` * fixing issues with input fields with extension (and using them in templates) -* adding simple validators to input spec (using ``attr.validator``) +* adding simple validators to input definition (using ``attr.validator``) * adding ``create_dotfile`` for workflows, that creates graphs as dotfiles (can convert to other formats if dot available) * adding a simple user guide with ``input_spec`` description * expanding docstrings for ``State``, ``audit`` and ``messenger`` diff --git a/docs/input_spec.rst b/docs/input_spec.rst index 18679d5dee..bafaa37a82 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -35,7 +35,7 @@ Let's start from the previous example: In order to create an input definition, a new `SpecInfo` object has to be created. -The field `name` specifies the type of the spec and it should be always "Input" for +The field `name` specifies the type of the definition and it should be always "Input" for the input definition. The field `bases` specifies the "base definition" you want to use (can think about it as a `parent class`) and it will usually contains `ShellDef` only, unless you want to build on top of @@ -150,12 +150,12 @@ In the example we used multiple keys in the metadata dictionary including `help_ If `True` a path will be consider as a path inside the container (and not as a local path). `output_file_template` (`str`): - If provided, the field is treated also as an output field and it is added to the output spec. + If provided, the field is treated also as an output field and it is added to the output definition. The template can use other fields, e.g. `{file1}`. Used in order to create an output definition. `output_field_name` (`str`, used together with `output_file_template`) - If provided the field is added to the output spec with changed name. + If provided the field is added to the output definition with changed name. Used in order to create an output definition. `keep_extension` (`bool`, default: `True`): diff --git a/docs/output_spec.rst b/docs/output_spec.rst index 183e273339..347b8b1a55 100644 --- a/docs/output_spec.rst +++ b/docs/output_spec.rst @@ -39,7 +39,7 @@ a customized `output_spec` can be used, e.g.: Similarly as for `input_spec`, in order to create an output definition, a new `SpecInfo` object has to be created. -The field `name` specifies the type of the spec and it should be always "Output" for +The field `name` specifies the type of the definition and it should be always "Output" for the output definition. The field `bases` specifies the "base definition" you want to use (can think about it as a `parent class`) and it will usually contains `ShellOutDef` only, unless you want to build on top of @@ -69,7 +69,7 @@ The metadata dictionary for `output_spec` can include: The template can use other fields, e.g. `{file1}`. The same as in `input_spec`. `output_field_name` (`str`, used together with `output_file_template`) - If provided the field is added to the output spec with changed name. + If provided the field is added to the output definition with changed name. The same as in `input_spec`. `keep_extension` (`bool`, default: `True`): diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index 8830af4e2e..c6f4e64afa 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -331,7 +331,7 @@ " outputs={\"out_file_size\": get_file_size},\n", ")\n", "\n", - "# Parameterise the task spec\n", + "# Parameterise the task definition\n", "cp_with_size = CpWithSize(in_file=File.sample())\n", "\n", "# Run the command\n", diff --git a/pydra/design/base.py b/pydra/design/base.py index 09f5dbc230..cbb9d8e5cf 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -404,7 +404,7 @@ def make_task_spec( if name is None: raise ValueError("name must be provided if klass is not") if klass is not None and issubclass(klass, TaskDef): - raise ValueError(f"Cannot change type of spec {klass} to {spec_type}") + raise ValueError(f"Cannot change type of definition {klass} to {spec_type}") bases = tuple(bases) # Ensure that TaskDef is a base class if not any(issubclass(b, spec_type) for b in bases): @@ -497,7 +497,7 @@ def make_outputs_spec( if not any(issubclass(b, spec_type) for b in bases): if out_spec_bases := [b for b in bases if issubclass(b, TaskOutputs)]: raise ValueError( - f"Cannot make {spec_type} output spec from {out_spec_bases} bases" + f"Cannot make {spec_type} output definition from {out_spec_bases} bases" ) outputs_bases = bases + (spec_type,) if reserved_names := [n for n in outputs if n in spec_type.RESERVED_FIELD_NAMES]: diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 54050d60c0..410f855341 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -135,7 +135,7 @@ def _download_spec(zenodo_id): searcher = Searcher(zenodo_id, exact_match=True) hits = searcher.zenodo_search().json()["hits"]["hits"] if len(hits) == 0: - raise Exception(f"can't find zenodo spec for {zenodo_id}") + raise Exception(f"can't find zenodo definition for {zenodo_id}") elif len(hits) > 1: raise Exception(f"too many hits for {zenodo_id}") else: @@ -146,8 +146,8 @@ def _download_spec(zenodo_id): def _prepare_input_spec(bosh_spec: dict[str, ty.Any], names_subset=None): - """creating input spec from the zenodo file - if name_subset provided, only names from the subset will be used in the spec + """creating input definition from the zenodo file + if name_subset provided, only names from the subset will be used in the definition """ binputs = bosh_spec["inputs"] input_keys = {} @@ -185,13 +185,13 @@ def _prepare_input_spec(bosh_spec: dict[str, ty.Any], names_subset=None): ) input_keys[input["value-key"]] = "{" + f"{name}" + "}" if names_subset: - raise RuntimeError(f"{names_subset} are not in the zenodo input spec") + raise RuntimeError(f"{names_subset} are not in the zenodo input definition") return fields, input_keys def _prepare_output_spec(bosh_spec: dict[str, ty.Any], input_keys, names_subset=None): - """creating output spec from the zenodo file - if name_subset provided, only names from the subset will be used in the spec + """creating output definition from the zenodo file + if name_subset provided, only names from the subset will be used in the definition """ boutputs = bosh_spec["output-files"] fields = [] @@ -219,5 +219,5 @@ def _prepare_output_spec(bosh_spec: dict[str, ty.Any], input_keys, names_subset= ) if names_subset: - raise RuntimeError(f"{names_subset} are not in the zenodo output spec") + raise RuntimeError(f"{names_subset} are not in the zenodo output definition") return fields diff --git a/pydra/design/python.py b/pydra/design/python.py index afd1a03cd7..75b30c9107 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -21,7 +21,7 @@ @attrs.define class arg(Arg): - """Argument of a Python task spec + """Argument of a Python task definition Parameters ---------- @@ -58,7 +58,7 @@ class arg(Arg): @attrs.define class out(Out): - """Output of a Python task spec + """Output of a Python task definition Parameters ---------- diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 82c24c8fd4..00d233846d 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -27,8 +27,8 @@ def func(a: int) -> float: python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [python.out(name="out", type=float)] - spec = SampleDef(a=1) - result = spec() + definition = SampleDef(a=1) + result = definition() assert result.output.out == 2.0 with pytest.raises(TypeError): SampleDef(a=1.5) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 32c84d5a63..25d89f3e17 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -23,7 +23,7 @@ @attrs.define class arg(Arg): - """Argument of a workflow task spec + """Argument of a workflow task definition Parameters ---------- @@ -63,7 +63,7 @@ class arg(Arg): @attrs.define class out(Out): - """Output of a workflow task spec + """Output of a workflow task definition Parameters ---------- diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 904bbf9b9e..33dcb885c0 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -78,13 +78,13 @@ class Task: _references = None # List of references for a task name: str - spec: TaskDef + definition: TaskDef _inputs: dict[str, ty.Any] | None = None def __init__( self, - spec, + definition, name: str | None = None, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, @@ -137,12 +137,12 @@ def __init__( if Task._etelemetry_version_data is None: Task._etelemetry_version_data = check_latest_version() - self.spec = spec + self.definition = definition self.name = name self.input_names = [ field.name - for field in attr.fields(type(self.spec)) + for field in attr.fields(type(self.definition)) if field.name not in ["_func", "_graph_checksums"] ] @@ -159,11 +159,11 @@ def __init__( raise ValueError(f"Unknown input set {inputs!r}") inputs = self._input_sets[inputs] - self.spec = attr.evolve(self.spec, **inputs) + self.definition = attr.evolve(self.definition, **inputs) # checking if metadata is set properly - self.spec._check_resolved() - self.spec._check_rules() + self.definition._check_resolved() + self.definition._check_rules() self._output = {} self._result = {} # flag that says if node finished all jobs @@ -195,11 +195,11 @@ def __str__(self): def __getstate__(self): state = self.__dict__.copy() - state["spec"] = cp.dumps(state["spec"]) + state["definition"] = cp.dumps(state["definition"]) return state def __setstate__(self, state): - state["spec"] = cp.loads(state["spec"]) + state["definition"] = cp.loads(state["definition"]) self.__dict__.update(state) def help(self, returnhelp=False): @@ -225,7 +225,7 @@ def checksum(self): and to create nodes checksums needed for graph checksums (before the tasks have inputs etc.) """ - input_hash = self.spec._hash + input_hash = self.definition._hash self._checksum = create_checksum(self.__class__.__name__, input_hash) return self._checksum @@ -262,7 +262,7 @@ def output_names(self): """Get the names of the outputs from the task's output_spec (not everything has to be generated, see generated_output_names). """ - return [f.name for f in attr.fields(self.spec.Outputs)] + return [f.name for f in attr.fields(self.definition.Outputs)] @property def generated_output_names(self): @@ -373,10 +373,12 @@ def inputs(self) -> dict[str, ty.Any]: from pydra.utils.typing import TypeParser self._inputs = { - k: v for k, v in attrs_values(self.spec).items() if not k.startswith("_") + k: v + for k, v in attrs_values(self.definition).items() + if not k.startswith("_") } map_copyfiles = {} - for fld in list_fields(self.spec): + for fld in list_fields(self.definition): name = fld.name value = self._inputs[name] if value is not attr.NOTHING and TypeParser.contains_type( @@ -392,7 +394,9 @@ def inputs(self) -> dict[str, ty.Any]: if value is not copied_value: map_copyfiles[name] = copied_value self._inputs.update( - template_update(self.spec, self.output_dir, map_copyfiles=map_copyfiles) + template_update( + self.definition, self.output_dir, map_copyfiles=map_copyfiles + ) ) return self._inputs @@ -436,7 +440,7 @@ def _run(self, rerun=False, environment=None): try: self.audit.monitor() self._run_task(environment=environment) - result.output = self.spec.Outputs.from_task(self) + result.output = self.definition.Outputs.from_task(self) except Exception: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) @@ -483,7 +487,7 @@ def get_input_el(self, ind): for inp in set(self.input_names): if f"{self.name}.{inp}" in input_ind: inputs_dict[inp] = self._extract_input_el( - inputs=self.spec, + inputs=self.definition, inp_nm=inp, ind=input_ind[f"{self.name}.{inp}"], ) @@ -506,7 +510,7 @@ def pickle_task(self): def done(self): """Check whether the tasks has been finalized and all outputs are stored.""" # if any of the field is lazy, there is no need to check results - if has_lazy(self.spec): + if has_lazy(self.definition): return False _result = self.result() if self.state: @@ -588,7 +592,7 @@ def result(self, state_index=None, return_inputs=False): self._errored = True if return_inputs is True or return_inputs == "val": inputs_val = { - f"{self.name}.{inp}": getattr(self.spec, inp) + f"{self.name}.{inp}": getattr(self.definition, inp) for inp in self.input_names } return (inputs_val, result) @@ -600,19 +604,19 @@ def result(self, state_index=None, return_inputs=False): def _reset(self): """Reset the connections between inputs and LazyFields.""" - for field in attrs_fields(self.spec): + for field in attrs_fields(self.definition): if field.name in self.inp_lf: - setattr(self.spec, field.name, self.inp_lf[field.name]) + setattr(self.definition, field.name, self.inp_lf[field.name]) if is_workflow(self): for task in self.graph.nodes: task._reset() def _check_for_hash_changes(self): - hash_changes = self.spec._hash_changes() + hash_changes = self.definition._hash_changes() details = "" for changed in hash_changes: - field = getattr(attr.fields(type(self.spec)), changed) - val = getattr(self.spec, changed) + field = getattr(attr.fields(type(self.definition)), changed) + val = getattr(self.definition, changed) field_type = type(val) if issubclass(field.type, FileSet): details += ( @@ -644,8 +648,8 @@ def _check_for_hash_changes(self): "Input values and hashes for '%s' %s node:\n%s\n%s", self.name, type(self).__name__, - self.spec, - self.spec._hashes, + self.definition, + self.definition._hashes, ) SUPPORTED_COPY_MODES = FileSet.CopyMode.any @@ -753,12 +757,12 @@ def checksum(self): (before the tasks have inputs etc.) """ # if checksum is called before run the _graph_checksums is not ready - if is_workflow(self) and self.spec._graph_checksums is attr.NOTHING: - self.spec._graph_checksums = { + if is_workflow(self) and self.definition._graph_checksums is attr.NOTHING: + self.definition._graph_checksums = { nd.name: nd.checksum for nd in self.graph_sorted } - input_hash = self.spec.hash + input_hash = self.definition.hash if not self.state: self._checksum = create_checksum( self.__class__.__name__, self._checksum_wf(input_hash) @@ -1037,7 +1041,7 @@ async def _run_task(self, submitter, rerun=False, environment=None): # logger.info("Added %s to %s", self.output_spec, self) def _collect_outputs(self): - output_klass = self.spec.Outputs + output_klass = self.definition.Outputs output = output_klass( **{f.name: attr.NOTHING for f in attr.fields(output_klass)} ) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index edf292ed9a..3f2de60486 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -25,9 +25,11 @@ PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" -def attrs_fields(spec, exclude_names=()) -> list[attrs.Attribute]: - """Get the fields of a spec, excluding some names.""" - return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] +def attrs_fields(definition, exclude_names=()) -> list[attrs.Attribute]: + """Get the fields of a definition, excluding some names.""" + return [ + field for field in definition.__attrs_attrs__ if field.name not in exclude_names + ] def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: @@ -35,22 +37,22 @@ def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: return attrs.asdict(obj, recurse=False, **kwargs) -def list_fields(spec: "type[TaskDef] | TaskDef") -> list["Field"]: +def list_fields(definition: "type[TaskDef] | TaskDef") -> list["Field"]: """List the fields of a task definition""" - if not inspect.isclass(spec): - spec = type(spec) - if not attrs.has(spec): + if not inspect.isclass(definition): + definition = type(definition) + if not attrs.has(definition): return [] return [ f.metadata[PYDRA_ATTR_METADATA] - for f in attrs.fields(spec) + for f in attrs.fields(definition) if PYDRA_ATTR_METADATA in f.metadata ] -def fields_dict(spec: "type[TaskDef] | TaskDef") -> dict[str, "Field"]: - """Returns the fields of a spec in a dictionary""" - return {f.name: f for f in list_fields(spec)} +def fields_dict(definition: "type[TaskDef] | TaskDef") -> dict[str, "Field"]: + """Returns the fields of a definition in a dictionary""" + return {f.name: f for f in list_fields(definition)} # from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile @@ -546,7 +548,7 @@ def parse_format_string(fmtstr): conversion = "(?:!r|!s)" nobrace = "[^{}]*" # Example: 0{pads[hex]}x (capture "pads") - fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in spec + fmtspec = f"{nobrace}(?:{{({identifier}){nobrace}}}{nobrace})?" # Capture keywords in definition full_field = f"{{{field_with_lookups}{conversion}?(?::{fmtspec})?}}" all_keywords = re.findall(full_field, fmtstr) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 3705fd599f..46884573d8 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -98,16 +98,16 @@ def copy_fileset(fileset: FileSet): # not sure if this might be useful for Function Task def template_update( - spec, output_dir: Path, map_copyfiles: dict[str, Path] | None = None + definition, output_dir: Path, map_copyfiles: dict[str, Path] | None = None ): """ - Update all templates that are present in the input spec. + Update all templates that are present in the input definition. Should be run when all inputs used in the templates are already set. """ - inputs_dict_st = attrs_values(spec) + inputs_dict_st = attrs_values(definition) if map_copyfiles is not None: inputs_dict_st.update(map_copyfiles) @@ -116,12 +116,12 @@ def template_update( # Collect templated inputs for which all requirements are satisfied. fields_templ = [ field - for field in list_fields(spec) + for field in list_fields(definition) if isinstance(field, shell.outarg) and field.path_template - and getattr(spec, field.name) is not False + and getattr(definition, field.name) is not False and all( - getattr(spec, required_field) is not None + getattr(definition, required_field) is not None for required_field in field.requires ) ] @@ -130,7 +130,7 @@ def template_update( for fld in fields_templ: dict_mod[fld.name] = template_update_single( field=fld, - spec=spec, + definition=definition, input_values=inputs_dict_st, output_dir=output_dir, ) @@ -142,7 +142,7 @@ def template_update( def template_update_single( field, - spec, + definition, input_values: dict[str, ty.Any] = None, output_dir: Path | None = None, spec_type: str = "input", @@ -156,7 +156,7 @@ def template_update_single( from pydra.utils.typing import TypeParser, OUTPUT_TEMPLATE_TYPES # noqa if input_values is None: - input_values = attrs_values(spec) + input_values = attrs_values(definition) if spec_type == "input": inp_val_set = input_values[field.name] @@ -182,7 +182,7 @@ def template_update_single( # if input fld is set to False, the fld shouldn't be used (setting NOTHING) return None # inputs_dict[field.name] is True or spec_type is output - value = _template_formatting(field, spec, input_values) + value = _template_formatting(field, definition, input_values) # changing path so it is in the output_dir if output_dir and value is not None: # should be converted to str, it is also used for input fields that should be str @@ -194,7 +194,7 @@ def template_update_single( return None -def _template_formatting(field, spec, input_values): +def _template_formatting(field, definition, input_values): """Formatting the field template based on the values from inputs. Taking into account that the field with a template can be a MultiOutputFile and the field values needed in the template can be a list - @@ -219,20 +219,23 @@ def _template_formatting(field, spec, input_values): # if a template is a function it has to be run first with the inputs as the only arg template = field.path_template if callable(template): - template = template(spec) + template = template(definition) # as default, we assume that keep_extension is True if isinstance(template, (tuple, list)): formatted = [ - _string_template_formatting(field, t, spec, input_values) for t in template + _string_template_formatting(field, t, definition, input_values) + for t in template ] else: assert isinstance(template, str) - formatted = _string_template_formatting(field, template, spec, input_values) + formatted = _string_template_formatting( + field, template, definition, input_values + ) return formatted -def _string_template_formatting(field, template, spec, input_values): +def _string_template_formatting(field, template, definition, input_values): from pydra.utils.typing import MultiInputObj, MultiOutputFile inp_fields = re.findall(r"{\w+}", template) diff --git a/pydra/engine/helpers_state.py b/pydra/engine/helpers_state.py index 76847079d4..94022e5724 100644 --- a/pydra/engine/helpers_state.py +++ b/pydra/engine/helpers_state.py @@ -637,7 +637,7 @@ def unwrap_splitter( Parameters ---------- splitter: str or list[str] or tuple[str, ...] - the splitter spec to unwrap + the splitter definition to unwrap Returns ------- diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 5130296ab2..5c18b27ae9 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -222,7 +222,7 @@ def __call__( """ self._check_rules() task = self.Task( - spec=self, + definition=self, name=name, audit_flags=audit_flags, cache_dir=cache_dir, @@ -375,7 +375,7 @@ def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: ) def _check_resolved(self): - """Checks that all the fields in the spec have been resolved""" + """Checks that all the fields in the definition have been resolved""" if has_lazy_values := [n for n, v in attrs_values(self).items() if is_lazy(v)]: raise ValueError( f"Cannot execute {self} because the following fields " @@ -523,7 +523,9 @@ def from_task( continue # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides - if isinstance(fld, shell.outarg) and is_set(getattr(task.spec, fld.name)): + if isinstance(fld, shell.outarg) and is_set( + getattr(task.definition, fld.name) + ): resolved_value = task.inputs[fld.name] elif is_set(fld.default): resolved_value = cls._resolve_default_value(fld, task.output_dir) @@ -785,7 +787,7 @@ def _command_pos_args( # if argstr has a more complex form, with "{input_field}" if "{" in field.argstr and "}" in field.argstr: cmd_el_str = field.argstr.replace(f"{{{field.name}}}", str(value)) - cmd_el_str = argstr_formatting(cmd_el_str, self.spec) + cmd_el_str = argstr_formatting(cmd_el_str, self.definition) else: # argstr has a simple form, e.g. "-f", or "--f" if value: cmd_el_str = f"{field.argstr} {value}" diff --git a/pydra/engine/task.py b/pydra/engine/task.py index b3377a846b..f5eb253f31 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -70,14 +70,14 @@ class PythonTask(Task): """Wrap a Python callable as a task element.""" - spec: PythonDef + definition: PythonDef def _run_task(self, environment=None): - inputs = attrs_values(self.spec) + inputs = attrs_values(self.definition) del inputs["function"] self.output_ = None - output = self.spec.function(**inputs) - output_names = [f.name for f in attrs.fields(self.spec.Outputs)] + output = self.definition.function(**inputs) + output_names = [f.name for f in attrs.fields(self.definition.Outputs)] if output is None: self.output_ = {nm: None for nm in output_names} elif len(output_names) == 1: @@ -97,11 +97,11 @@ def _run_task(self, environment=None): class ShellTask(Task): """Wrap a shell command as a task element.""" - spec: ShellDef + definition: ShellDef def __init__( self, - spec: ShellDef, + definition: ShellDef, audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cont_dim=None, @@ -142,7 +142,7 @@ def __init__( self.stdout = None self.stderr = None super().__init__( - spec=spec, + definition=definition, name=name, inputs=kwargs, cont_dim=cont_dim, @@ -180,7 +180,9 @@ def get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: return self.bindings def command_args(self, root: Path | None = None) -> list[str]: - return self.spec._command_args(input_updates=self.inputs_mod_root, root=root) + return self.definition._command_args( + input_updates=self.inputs_mod_root, root=root + ) def _run_task(self, environment=None): if environment is None: @@ -193,9 +195,9 @@ def _prepare_bindings(self, root: str): This updates the ``bindings`` attribute of the current task to make files available in an ``Environment``-defined ``root``. """ - for fld in attrs_fields(self.spec): + for fld in attrs_fields(self.definition): if TypeParser.contains_type(FileSet, fld.type): - fileset = getattr(self.spec, fld.name) + fileset = getattr(self.definition, fld.name) copy = fld.copy_mode == FileSet.CopyMode.copy host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") @@ -218,11 +220,14 @@ def resolve_value( """Collect output file if metadata specified.""" from pydra.design import shell - if not self.spec.Outputs._required_fields_satisfied(fld, self.spec): + if not self.definition.Outputs._required_fields_satisfied(fld, self.definition): return None elif isinstance(fld, shell.outarg) and fld.path_template: return template_update_single( - fld, spec=self.spec, output_dir=self.output_dir, spec_type="output" + fld, + definition=self.definition, + output_dir=self.output_dir, + spec_type="output", ) elif fld.callable: callable_ = fld.callable @@ -266,7 +271,7 @@ def generated_output_names(self, stdout: str, stderr: str): TODO: should be in all Output specs? """ # checking the input (if all mandatory fields are provided, etc.) - self.spec._check_rules() + self.definition._check_rules() output_names = ["return_code", "stdout", "stderr"] for fld in list_fields(self): # assuming that field should have either default or metadata, but not both @@ -285,9 +290,9 @@ def _command_args_single(self, state_ind=None, index=None): """Get command line arguments for a single state""" input_filepath = self._bosh_invocation_file(state_ind=state_ind, index=index) cmd_list = ( - self.spec.executable + self.definition.executable + [str(self.bosh_file), input_filepath] - + self.spec.args + + self.definition.args + self.bindings ) return cmd_list @@ -295,11 +300,13 @@ def _command_args_single(self, state_ind=None, index=None): def _bosh_invocation_file(self, state_ind=None, index=None): """creating bosh invocation file - json file with inputs values""" input_json = {} - for f in attrs_fields(self.spec, exclude_names=("executable", "args")): + for f in attrs_fields(self.definition, exclude_names=("executable", "args")): if self.state and f"{self.name}.{f.name}" in state_ind: - value = getattr(self.spec, f.name)[state_ind[f"{self.name}.{f.name}"]] + value = getattr(self.definition, f.name)[ + state_ind[f"{self.name}.{f.name}"] + ] else: - value = getattr(self.spec, f.name) + value = getattr(self.definition, f.name) # adding to the json file if specified by the user if value is not attrs.NOTHING and value != "NOTHING": if is_local_file(f): diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index ba2b1af117..cc5635a936 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -43,7 +43,7 @@ def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir) @need_bosh_docker @pytest.mark.flaky(reruns=3) def test_boutiques_spec_1(data_tests_dir): - """testing spec: providing input/output fields names""" + """testing definition: providing input/output fields names""" btask = boutiques.define( zenodo_id="1482743", input_spec_names=["infile", "maskfile"], @@ -56,8 +56,8 @@ def test_boutiques_spec_1(data_tests_dir): assert len(btask.input_spec.fields) == 2 assert btask.input_spec.fields[0][0] == "infile" assert btask.input_spec.fields[1][0] == "maskfile" - assert hasattr(btask.spec, "infile") - assert hasattr(btask.spec, "maskfile") + assert hasattr(btask.definition, "infile") + assert hasattr(btask.definition, "maskfile") assert len(btask.output_spec.fields) == 2 assert btask.output_spec.fields[0][0] == "outfile" @@ -68,7 +68,7 @@ def test_boutiques_spec_1(data_tests_dir): @need_bosh_docker @pytest.mark.flaky(reruns=3) def test_boutiques_spec_2(data_tests_dir): - """testing spec: providing partial input/output fields names""" + """testing definition: providing partial input/output fields names""" btask = boutiques.define( zenodo_id="1482743", input_spec_names=["infile"], output_spec_names=[] )( @@ -79,9 +79,9 @@ def test_boutiques_spec_2(data_tests_dir): fields = attrs_values(btask) assert len(fields) == 1 assert fields[0][0] == "infile" - assert hasattr(btask.spec, "infile") + assert hasattr(btask.definition, "infile") # input doesn't see maskfile - assert not hasattr(btask.spec, "maskfile") + assert not hasattr(btask.definition, "maskfile") assert len(btask.output_spec.fields) == 0 diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index d1ba4d62ff..c11d212a09 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -74,7 +74,7 @@ def test_docker_2a(results_function, plugin): args=cmd_args, environment=Docker(image="busybox"), ) - assert docky.spec.executable == "echo" + assert docky.definition.executable == "echo" assert docky.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" res = results_function(docky, plugin) @@ -129,7 +129,7 @@ def test_docker_outputspec_1(plugin, tmp_path): @no_win @need_docker def test_docker_inputspec_1(tmp_path): - """a simple customized input spec for docker task""" + """a simple customized input definition for docker task""" filename = str(tmp_path / "file_pydra.txt") with open(filename, "w") as f: f.write("hello from pydra") @@ -160,7 +160,7 @@ def test_docker_inputspec_1(tmp_path): @no_win @need_docker def test_docker_inputspec_1a(tmp_path): - """a simple customized input spec for docker task + """a simple customized input definition for docker task a default value is used """ filename = str(tmp_path / "file_pydra.txt") @@ -192,7 +192,7 @@ def test_docker_inputspec_1a(tmp_path): @no_win @need_docker def test_docker_inputspec_2(plugin, tmp_path): - """a customized input spec with two fields for docker task""" + """a customized input definition with two fields for docker task""" filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") @@ -234,7 +234,7 @@ def test_docker_inputspec_2(plugin, tmp_path): @no_win @need_docker def test_docker_inputspec_2a_except(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default, and second doesn't - raises a dataclass exception """ filename_1 = tmp_path / "file_pydra.txt" @@ -270,7 +270,7 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): file2=filename_2, strip=True, ) - assert docky.spec.file2.fspath == filename_2 + assert docky.definition.file2.fspath == filename_2 res = docky() assert res.output.stdout == "hello from pydra\nhave a nice one" @@ -279,7 +279,7 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): @no_win @need_docker def test_docker_inputspec_2a(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default value this is fine even if the second field is not using any defaults """ @@ -407,7 +407,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): @no_win @need_docker def test_docker_inputspec_state_1(plugin, tmp_path): - """a customised input spec for a docker file with a splitter, + """a customised input definition for a docker file with a splitter, splitter is on files """ filename_1 = tmp_path / "file_pydra.txt" @@ -443,8 +443,8 @@ def test_docker_inputspec_state_1(plugin, tmp_path): @no_win @need_docker def test_docker_inputspec_state_1b(plugin, tmp_path): - """a customised input spec for a docker file with a splitter, - files from the input spec have the same path in the local os and the container, + """a customised input definition for a docker file with a splitter, + files from the input definition have the same path in the local os and the container, so hash is calculated and the test works fine """ file_1 = tmp_path / "file_pydra.txt" @@ -479,7 +479,7 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): @no_win @need_docker def test_docker_wf_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with docker tasks""" + """a customized input definition for workflow with docker tasks""" filename = tmp_path / "file_pydra.txt" with open(filename, "w") as f: f.write("hello from pydra") @@ -519,7 +519,7 @@ def Workflow(cmd, file): @no_win @need_docker def test_docker_wf_state_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with docker tasks that has a state""" + """a customized input definition for workflow with docker tasks that has a state""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: @@ -564,7 +564,7 @@ def Workflow(cmd, file): @no_win @need_docker def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with docker tasks with states""" + """a customized input definition for workflow with docker tasks with states""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index c0973379c6..ee04791337 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -394,11 +394,11 @@ class MyCommand(ShellTask): task = MyCommand(in_file=filename) assert task.cmdline == f"my {filename}" - task.spec.optional = True + task.definition.optional = True assert task.cmdline == f"my {filename} --opt {task.output_dir / 'file.out'}" - task.spec.optional = False + task.definition.optional = False assert task.cmdline == f"my {filename}" - task.spec.optional = "custom-file-out.txt" + task.definition.optional = "custom-file-out.txt" assert task.cmdline == f"my {filename} --opt custom-file-out.txt" diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index a93492a0ff..c51ead7829 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -48,13 +48,13 @@ class Outputs(ShellOutputs): def test_interface_specs_1(): - """testing if class input/output spec are set properly""" + """testing if class input/output definition are set properly""" task_spec = Interf_1(executable="ls") assert task.Outputs == Interf_1.Outputs def test_interface_specs_2(): - """testing if class input/output spec are overwritten properly by the user's specs""" + """testing if class input/output definition are overwritten properly by the user's specs""" my_input_spec = SpecInfo( name="Input", fields=[("my_inp", ty.Any, {"help_string": "my inp"})], @@ -72,7 +72,7 @@ def test_interface_executable_1(): """testing if the class executable is properly set and used in the command line""" task = Interf_2() assert task.executable == "testing command" - assert task.spec.executable == "testing command" + assert task.definition.executable == "testing command" assert task.cmdline == "testing command" @@ -83,14 +83,14 @@ def test_interface_executable_2(): task = Interf_2(executable="i want a different command") assert task.executable == "testing command" # task.executable stays the same, but input.executable is changed, so the cmd is changed - assert task.spec.executable == "i want a different command" + assert task.definition.executable == "i want a different command" assert task.cmdline == "i want a different command" def test_interface_cmdline_with_spaces(): task = Interf_3(in_file="/path/to/file/with spaces") assert task.executable == "testing command" - assert task.spec.executable == "testing command" + assert task.definition.executable == "testing command" assert task.cmdline == "testing command '/path/to/file/with spaces'" diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index d03dbd466f..e5507a24da 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -79,7 +79,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): # separate command into exec + args shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path - assert shelly.spec.executable == "echo" + assert shelly.definition.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) res = results_function(shelly, plugin) @@ -96,7 +96,7 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): # separate command into exec + args shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path - assert shelly.spec.executable == "echo" + assert shelly.definition.executable == "echo" assert shelly.cmdline == "echo pydra" res = results_function(shelly, plugin) @@ -273,7 +273,7 @@ def test_wf_shell_cmd_1(plugin, tmp_path): assert "_task.pklz" in res.output.out -# customised input spec +# customised input definition @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) @@ -308,8 +308,8 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec - assert shelly.spec.args == cmd_args + assert shelly.definition.executable == cmd_exec + assert shelly.definition.args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" res = results_function(shelly, plugin) @@ -357,8 +357,8 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec - assert shelly.spec.args == cmd_args + assert shelly.definition.executable == cmd_exec + assert shelly.definition.args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO from pydra" @@ -396,7 +396,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo HELLO" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO\n" @@ -405,7 +405,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided - using shorter syntax for input spec (no attr.ib) + using shorter syntax for input definition (no attr.ib) """ cmd_exec = "echo" hello = "HELLO" @@ -429,7 +429,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo HELLO" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO\n" @@ -463,9 +463,9 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): shelly = ShellTask( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - shelly.spec.text = hello + shelly.definition.text = hello - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo HELLO" res = results_function(shelly, plugin) assert res.output.stdout == "HELLO\n" @@ -531,7 +531,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo" res = results_function(shelly, plugin) assert res.output.stdout == "\n" @@ -561,7 +561,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo Hello" res = results_function(shelly, plugin) @@ -571,7 +571,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided - using shorter syntax for input spec (no attr.ib) + using shorter syntax for input definition (no attr.ib) """ cmd_exec = "echo" my_input_spec = SpecInfo( @@ -587,7 +587,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo Hello" res = results_function(shelly, plugin) @@ -618,7 +618,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "echo Hi" res = results_function(shelly, plugin) @@ -729,7 +729,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "ls -t" results_function(shelly, plugin) @@ -826,7 +826,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "ls -l -t" results_function(shelly, plugin) @@ -914,8 +914,8 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): input_spec=my_input_spec, cache_dir=tmp_path, ) - shelly.spec.opt_l = cmd_l - assert shelly.spec.executable == cmd_exec + shelly.definition.opt_l = cmd_l + assert shelly.definition.executable == cmd_exec assert shelly.cmdline == "ls -l -t" results_function(shelly, plugin) @@ -1506,7 +1506,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): cache_dir=tmp_path, ) - assert shelly.spec.executable == cmd_exec + assert shelly.definition.executable == cmd_exec res = results_function(shelly, plugin) assert res.output.stdout == "hello from boston" @@ -1591,7 +1591,7 @@ def test_shell_cmd_inputspec_11(tmp_path): wf = Workflow(name="wf", input_spec=["inputFiles"], inputFiles=["test1", "test2"]) - task.spec.inputFiles = wf.lzin.inputFiles + task.definition.inputFiles = wf.lzin.inputFiles wf.add(task) wf.set_output([("out", wf.echoMultiple.lzout.outputFiles)]) @@ -1705,8 +1705,8 @@ def test_shell_cmd_inputspec_with_iterable(): task = ShellTask(name="test", input_spec=input_spec, executable="test") for iterable_type in (list, tuple): - task.spec.iterable_1 = iterable_type(range(3)) - task.spec.iterable_2 = iterable_type(["bar", "foo"]) + task.definition.iterable_1 = iterable_type(range(3)) + task.definition.iterable_2 = iterable_type(["bar", "foo"]) assert task.cmdline == "test --in1 0 1 2 --in2 bar --in2 foo" @@ -2653,7 +2653,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): assert all([file.fspath.exists() for file in res.output.cp_file]) -# customised output spec +# customised output definition @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) @@ -2879,7 +2879,7 @@ def gather_output(executable, output_dir, ble): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): """ - Customised output spec defined as a class, + Customised output definition defined as a class, using a static function to collect output files. """ @@ -3264,7 +3264,7 @@ def get_lowest_directory(directory_path): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) def test_shell_cmd_outputspec_8d(tmp_path, plugin, results_function): """ - customised output_spec, adding Directory to the output named by input spec + customised output_spec, adding Directory to the output named by input definition """ # For /tmp/some_dict/test this function returns "/test" @@ -3446,8 +3446,8 @@ def test_shell_cmd_inputspec_outputspec_1(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.file2 = "new_file_2.txt" + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.file2 = "new_file_2.txt" res = shelly() assert res.output.stdout == "" @@ -3500,7 +3500,7 @@ def test_shell_cmd_inputspec_outputspec_1a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" + shelly.definition.file1 = "new_file_1.txt" res = shelly() assert res.output.stdout == "" @@ -3561,8 +3561,8 @@ def test_shell_cmd_inputspec_outputspec_2(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.file2 = "new_file_2.txt" + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.file2 = "new_file_2.txt" # all fields from output_spec should be in output_names and generated_output_names assert ( shelly.output_names @@ -3628,7 +3628,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" + shelly.definition.file1 = "new_file_1.txt" # generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", @@ -3700,9 +3700,9 @@ def test_shell_cmd_inputspec_outputspec_3(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.file2 = "new_file_2.txt" - shelly.spec.additional_inp = 2 + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.file2 = "new_file_2.txt" + shelly.definition.additional_inp = 2 res = shelly() assert res.output.stdout == "" @@ -3761,8 +3761,8 @@ def test_shell_cmd_inputspec_outputspec_3a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.file2 = "new_file_2.txt" + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.file2 = "new_file_2.txt" # generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", @@ -3825,8 +3825,8 @@ def test_shell_cmd_inputspec_outputspec_4(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.additional_inp = 2 + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.additional_inp = 2 # generated_output_names should be the same as output_names assert ( shelly.output_names @@ -3880,9 +3880,9 @@ def test_shell_cmd_inputspec_outputspec_4a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" + shelly.definition.file1 = "new_file_1.txt" # the value is not in the list from requires - shelly.spec.additional_inp = 1 + shelly.definition.additional_inp = 1 res = shelly() assert res.output.stdout == "" @@ -3935,8 +3935,8 @@ def test_shell_cmd_inputspec_outputspec_5(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.additional_inp_A = 2 + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.additional_inp_A = 2 res = shelly() assert res.output.stdout == "" @@ -3989,8 +3989,8 @@ def test_shell_cmd_inputspec_outputspec_5a(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" - shelly.spec.additional_inp_B = 2 + shelly.definition.file1 = "new_file_1.txt" + shelly.definition.additional_inp_B = 2 res = shelly() assert res.output.stdout == "" @@ -4043,7 +4043,7 @@ def test_shell_cmd_inputspec_outputspec_5b(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" + shelly.definition.file1 = "new_file_1.txt" res = shelly() assert res.output.stdout == "" @@ -4092,7 +4092,7 @@ def test_shell_cmd_inputspec_outputspec_6_except(): input_spec=my_input_spec, output_spec=my_output_spec, ) - shelly.spec.file1 = "new_file_1.txt" + shelly.definition.file1 = "new_file_1.txt" with pytest.raises(Exception, match="requires field can be"): shelly() @@ -4339,7 +4339,7 @@ def change_name(file): name="bet_task", executable="bet", in_file=in_file, input_spec=bet_input_spec ) out_file = shelly.output_dir / "test_brain.nii.gz" - assert shelly.spec.executable == "bet" + assert shelly.definition.executable == "bet" assert shelly.cmdline == f"bet {in_file} {out_file}" # res = shelly(plugin="cf") diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index e3b662af40..cd491670fb 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -163,7 +163,7 @@ def test_shell_cmd_inputs_2a(): inpB="inpNone2", input_spec=my_input_spec, ) - # position taken from the order in input spec + # position taken from the order in input definition assert shelly.cmdline == "executable inpNone1 inpNone2" @@ -718,7 +718,7 @@ def test_shell_cmd_inputs_not_given_1(): ) shelly = ShellTask(name="shelly", executable="executable", input_spec=my_input_spec) - shelly.spec.arg2 = "argument2" + shelly.definition.arg2 = "argument2" assert shelly.cmdline == "executable --arg2 argument2" @@ -922,7 +922,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): def test_shell_cmd_inputs_template_3a(): """additional inputs with output_file_template and an additional read-only fields that combine two outputs together in the command line - testing a different order within the input spec + testing a different order within the input definition """ my_input_spec = SpecInfo( name="Input", @@ -1746,7 +1746,7 @@ def test_shell_cmd_inputs_template_requires_1(): assert "--tpl" not in shelly.cmdline # When requirements are met. - shelly.spec.with_tpl = True + shelly.definition.with_tpl = True assert "tpl.in.file" in shelly.cmdline @@ -2191,55 +2191,55 @@ class Outputs(ShellOutputs): def test_task_inputs_mandatory_with_xOR_one_mandatory_is_OK(): - """input spec with mandatory inputs""" + """input definition with mandatory inputs""" task = SimpleTaskXor() - task.spec.input_1 = "Input1" - task.spec.input_2 = attr.NOTHING - task.spec.check_fields_input_spec() + task.definition.input_1 = "Input1" + task.definition.input_2 = attr.NOTHING + task.definition.check_fields_input_spec() def test_task_inputs_mandatory_with_xOR_one_mandatory_out_3_is_OK(): - """input spec with mandatory inputs""" + """input definition with mandatory inputs""" task = SimpleTaskXor() - task.spec.input_1 = attr.NOTHING - task.spec.input_2 = attr.NOTHING - task.spec.input_3 = True - task.spec.check_fields_input_spec() + task.definition.input_1 = attr.NOTHING + task.definition.input_2 = attr.NOTHING + task.definition.input_3 = True + task.definition.check_fields_input_spec() def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): - """input spec with mandatory inputs""" + """input definition with mandatory inputs""" task = SimpleTaskXor() - task.spec.input_1 = attr.NOTHING - task.spec.input_2 = attr.NOTHING + task.definition.input_1 = attr.NOTHING + task.definition.input_2 = attr.NOTHING with pytest.raises(Exception) as excinfo: - task.spec.check_fields_input_spec() + task.definition.check_fields_input_spec() assert "input_1 is mandatory" in str(excinfo.value) assert "no alternative provided by ['input_2', 'input_3']" in str(excinfo.value) assert excinfo.type is AttributeError def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): - """input spec with mandatory inputs""" + """input definition with mandatory inputs""" task = SimpleTaskXor() - task.spec.input_1 = "Input1" - task.spec.input_2 = True + task.definition.input_1 = "Input1" + task.definition.input_2 = True with pytest.raises(Exception) as excinfo: - task.spec.check_fields_input_spec() + task.definition.check_fields_input_spec() assert "input_1 is mutually exclusive with ['input_2']" in str(excinfo.value) assert excinfo.type is AttributeError def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): - """input spec with mandatory inputs""" + """input definition with mandatory inputs""" task = SimpleTaskXor() - task.spec.input_1 = "Input1" - task.spec.input_2 = True - task.spec.input_3 = False + task.definition.input_1 = "Input1" + task.definition.input_2 = True + task.definition.input_3 = False with pytest.raises(Exception) as excinfo: - task.spec.check_fields_input_spec() + task.definition.check_fields_input_spec() assert "input_1 is mutually exclusive with ['input_2', 'input_3']" in str( excinfo.value ) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 247e36dfb5..1af55843eb 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -195,7 +195,7 @@ def test_singularity_outputspec_1(plugin, tmp_path): @need_singularity def test_singularity_inputspec_1(plugin, tmp_path): - """a simple customized input spec for singularity task""" + """a simple customized input definition for singularity task""" filename = str((tmp_path / "file_pydra.txt")) with open(filename, "w") as f: f.write("hello from pydra") @@ -238,7 +238,7 @@ def test_singularity_inputspec_1(plugin, tmp_path): @need_singularity def test_singularity_inputspec_1a(plugin, tmp_path): - """a simple customized input spec for singularity task + """a simple customized input definition for singularity task a default value is used """ filename = str((tmp_path / "file_pydra.txt")) @@ -278,7 +278,7 @@ def test_singularity_inputspec_1a(plugin, tmp_path): @need_singularity def test_singularity_inputspec_2(plugin, tmp_path): - """a customized input spec with two fields for singularity task""" + """a customized input definition with two fields for singularity task""" filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") @@ -336,7 +336,7 @@ def test_singularity_inputspec_2(plugin, tmp_path): @need_singularity def test_singularity_inputspec_2a_except(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default, and second doesn't - raises a dataclass exception """ filename_1 = tmp_path / "file_pydra.txt" @@ -395,7 +395,7 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): @need_singularity def test_singularity_inputspec_2a(plugin, tmp_path): - """a customized input spec with two fields + """a customized input definition with two fields first one uses a default value, this is fine even if the second field is not using any defaults """ @@ -520,7 +520,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): @need_singularity def test_singularity_inputspec_state_1(tmp_path): - """a customised input spec for a singularity file with a splitter, + """a customised input definition for a singularity file with a splitter, splitter is on files """ filename_1 = tmp_path / "file_pydra.txt" @@ -569,8 +569,8 @@ def test_singularity_inputspec_state_1(tmp_path): @need_singularity def test_singularity_inputspec_state_1b(plugin, tmp_path): - """a customised input spec for a singularity file with a splitter, - files from the input spec have the same path in the local os and the container, + """a customised input definition for a singularity file with a splitter, + files from the input definition have the same path in the local os and the container, so hash is calculated and the test works fine """ file_1 = tmp_path / "file_pydra.txt" @@ -619,7 +619,7 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): @need_singularity def test_singularity_wf_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with singularity tasks""" + """a customized input definition for workflow with singularity tasks""" filename = tmp_path / "file_pydra.txt" with open(filename, "w") as f: f.write("hello from pydra") @@ -671,7 +671,7 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): @need_singularity def test_singularity_wf_state_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with singularity tasks that has a state""" + """a customized input definition for workflow with singularity tasks that has a state""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: @@ -728,7 +728,7 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): @need_singularity def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): - """a customized input spec for workflow with singularity tasks with states""" + """a customized input definition for workflow with singularity tasks with states""" file_1 = tmp_path / "file_pydra.txt" file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 17ce176c8a..12c13647a0 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -29,8 +29,8 @@ def test_basespec(): - spec = BaseDef() - assert spec.hash == "0b1d98df22ecd1733562711c205abca2" + definition = BaseDef() + assert definition.hash == "0b1d98df22ecd1733562711c205abca2" def test_runtime(): @@ -50,10 +50,10 @@ def test_result(): def test_shellspec(): with pytest.raises(TypeError): - spec = ShellDef() - spec = ShellDef(executable="ls") # (executable, args) - assert hasattr(spec, "executable") - assert hasattr(spec, "args") + definition = ShellDef() + definition = ShellDef(executable="ls") # (executable, args) + assert hasattr(definition, "executable") + assert hasattr(definition, "args") class NodeTesting: @@ -151,7 +151,7 @@ def test_input_file_hash_1(tmp_path): def test_input_file_hash_2(tmp_path): - """input spec with File types, checking when the checksum changes""" + """input definition with File types, checking when the checksum changes""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -180,7 +180,7 @@ def test_input_file_hash_2(tmp_path): def test_input_file_hash_2a(tmp_path): - """input spec with ty.Union[File, ...] type, checking when the checksum changes""" + """input definition with ty.Union[File, ...] type, checking when the checksum changes""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -215,7 +215,7 @@ def test_input_file_hash_2a(tmp_path): def test_input_file_hash_3(tmp_path): - """input spec with File types, checking when the hash and file_hash change""" + """input definition with File types, checking when the hash and file_hash change""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -269,7 +269,7 @@ def test_input_file_hash_3(tmp_path): def test_input_file_hash_4(tmp_path): - """input spec with nested list, that contain ints and Files, + """input definition with nested list, that contain ints and Files, checking changes in checksums """ file = tmp_path / "in_file_1.txt" @@ -308,7 +308,7 @@ def test_input_file_hash_4(tmp_path): def test_input_file_hash_5(tmp_path): - """input spec with File in nested containers, checking changes in checksums""" + """input definition with File in nested containers, checking changes in checksums""" file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 5a7e0d6311..104992f387 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -56,7 +56,7 @@ def test_numpy(): fft = mark.annotate({"a": np.ndarray, "return": np.ndarray})(np.fft.fft) fft = mark.task(fft)() arr = np.array([[1, 10], [2, 20]]) - fft.spec.a = arr + fft.definition.a = arr res = fft() assert np.allclose(np.fft.fft(arr), res.output.out) @@ -115,7 +115,7 @@ def testfunc( def test_annotated_func_dictreturn(): - """Test mapping from returned dictionary to output spec.""" + """Test mapping from returned dictionary to output definition.""" @python.define @mark.annotate({"return": {"sum": int, "mul": ty.Optional[int]}}) @@ -419,7 +419,7 @@ def testfunc(a: MultiInputObj): def test_annotated_func_multreturn_exception(): """function has two elements in the return statement, - but three element provided in the spec - should raise an error + but three element provided in the definition - should raise an error """ @python.define @@ -1318,7 +1318,7 @@ def test_shell_cmd(tmpdir): # separate command into exec + args shelly = ShellTask(executable=cmd[0], args=cmd[1:]) - assert shelly.spec.executable == "echo" + assert shelly.definition.executable == "echo" assert shelly.cmdline == " ".join(cmd) res = shelly._run() assert res.output.return_code == 0 diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 9384e1de60..39f005ccec 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -40,7 +40,7 @@ def test_wf_no_input_spec(): - with pytest.raises(ValueError, match='Empty "Inputs" spec'): + with pytest.raises(ValueError, match='Empty "Inputs" definition'): Workflow(name="workflow") @@ -75,14 +75,14 @@ def test_wf_specinfo_input_spec(): def test_wf_dict_input_and_output_spec(): - spec = { + definition = { "a": str, "b": ty.Dict[str, ty.Union[int, bool]], } wf = Workflow( name="workflow", - input_spec=spec, - output_spec=spec, + input_spec=definition, + output_spec=definition, ) wf.add( identity_2flds( diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 8dd09e0849..84d71f70b3 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -944,19 +944,19 @@ def make_spec(self, cmd=None, arg=None): psij.JobDef PSI/J job specification. """ - spec = self.psij.JobDef() - spec.executable = cmd - spec.arguments = arg + definition = self.psij.JobDef() + definition.executable = cmd + definition.arguments = arg - return spec + return definition - def make_job(self, spec, attributes): + def make_job(self, definition, attributes): """ Create a PSI/J job. Parameters ---------- - spec : psij.JobDef + definition : psij.JobDef PSI/J job specification. attributes : any Job attributes. @@ -967,7 +967,7 @@ def make_job(self, spec, attributes): PSI/J job. """ job = self.psij.Job() - job.spec = spec + job.definition = definition return job async def exec_psij(self, runnable, rerun=False): @@ -995,7 +995,7 @@ async def exec_psij(self, runnable, rerun=False): with open(file_path, "wb") as file: pickle.dump(runnable._run, file) func_path = absolute_path / "run_pickled.py" - spec = self.make_spec("python", [func_path, file_path]) + definition = self.make_spec("python", [func_path, file_path]) else: # it could be tuple that includes pickle files with tasks and inputs cache_dir = runnable[-1].cache_dir file_path_1 = cache_dir / "taskmain.pkl" @@ -1006,7 +1006,7 @@ async def exec_psij(self, runnable, rerun=False): with open(file_path_2, "wb") as file: pickle.dump(ind, file) func_path = absolute_path / "run_pickled.py" - spec = self.make_spec( + definition = self.make_spec( "python", [ func_path, @@ -1016,20 +1016,20 @@ async def exec_psij(self, runnable, rerun=False): ) if rerun: - spec.arguments.append("--rerun") + definition.arguments.append("--rerun") - spec.stdout_path = cache_dir / "demo.stdout" - spec.stderr_path = cache_dir / "demo.stderr" + definition.stdout_path = cache_dir / "demo.stdout" + definition.stderr_path = cache_dir / "demo.stderr" - job = self.make_job(spec, None) + job = self.make_job(definition, None) jex.submit(job) job.wait() - if spec.stderr_path.stat().st_size > 0: - with open(spec.stderr_path, "r") as stderr_file: + if definition.stderr_path.stat().st_size > 0: + with open(definition.stderr_path, "r") as stderr_file: stderr_contents = stderr_file.read() raise Exception( - f"stderr_path '{spec.stderr_path}' is not empty. Contents:\n{stderr_contents}" + f"stderr_path '{definition.stderr_path}' is not empty. Contents:\n{stderr_contents}" ) return diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py index ebe6835311..1851d526b5 100644 --- a/pydra/engine/workflow/base.py +++ b/pydra/engine/workflow/base.py @@ -37,19 +37,23 @@ class Workflow(ty.Generic[WorkflowOutputsType]): @classmethod def construct( cls, - spec: TaskDef[WorkflowOutputsType], + definition: TaskDef[WorkflowOutputsType], ) -> Self: """Construct a workflow from a definition, caching the constructed worklow""" - lazy_inputs = [f for f in list_fields(type(spec)) if f.lazy] + lazy_inputs = [f for f in list_fields(type(definition)) if f.lazy] - # Create a cache key by hashing all the non-lazy input values in the spec + # Create a cache key by hashing all the non-lazy input values in the definition # and use this to store the constructed workflow in case it is reused or nested # and split over within another workflow lazy_input_names = {f.name for f in lazy_inputs} non_lazy_vals = tuple( sorted( - (i for i in attrs_values(spec).items() if i[0] not in lazy_input_names), + ( + i + for i in attrs_values(definition).items() + if i[0] not in lazy_input_names + ), key=itemgetter(0), ) ) @@ -62,14 +66,14 @@ def construct( return cls._constructed[hash_key] # Initialise the outputs of the workflow - outputs = spec.Outputs( - **{f.name: attrs.NOTHING for f in attrs.fields(spec.Outputs)} + outputs = definition.Outputs( + **{f.name: attrs.NOTHING for f in attrs.fields(definition.Outputs)} ) # Initialise the lzin fields - lazy_spec = copy(spec) + lazy_spec = copy(definition) wf = cls.under_construction = Workflow( - name=type(spec).__name__, + name=type(definition).__name__, inputs=lazy_spec, outputs=outputs, ) @@ -98,7 +102,7 @@ def construct( if output_lazy_fields: if not isinstance(output_lazy_fields, (list, tuple)): output_lazy_fields = [output_lazy_fields] - output_fields = list_fields(spec.Outputs) + output_fields = list_fields(definition.Outputs) if len(output_lazy_fields) != len(output_fields): raise ValueError( f"Expected {len(output_fields)} outputs, got " @@ -149,7 +153,7 @@ def add(self, task_spec: TaskDef[OutputsType], name=None) -> OutputsType: name = type(task_spec).__name__ if name in self._nodes: raise ValueError(f"Node with name {name!r} already exists in the workflow") - node = Node[OutputsType](name=name, spec=task_spec, workflow=self) + node = Node[OutputsType](name=name, definition=task_spec, workflow=self) self._nodes[name] = node return node.lzout diff --git a/pydra/engine/workflow/node.py b/pydra/engine/workflow/node.py index 2920e5a07e..7010aa27b3 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/workflow/node.py @@ -99,7 +99,7 @@ def input_values(self) -> tuple[tuple[str, ty.Any]]: def lzout(self) -> OutputType: from pydra.engine.helpers import list_fields - """The output spec of the node populated with lazy fields""" + """The output definition of the node populated with lazy fields""" if self._lzout is not None: return self._lzout lazy_fields = {} @@ -329,16 +329,16 @@ def _checksum_states(self, state_index=None): TODO """ - # if is_workflow(self) and self.spec._graph_checksums is attr.NOTHING: - # self.spec._graph_checksums = { + # if is_workflow(self) and self.definition._graph_checksums is attr.NOTHING: + # self.definition._graph_checksums = { # nd.name: nd.checksum for nd in self.graph_sorted # } if state_index is not None: - inputs_copy = copy(self.spec) + inputs_copy = copy(self.definition) for key, ind in self.state.inputs_ind[state_index].items(): val = self._extract_input_el( - inputs=self.spec, inp_nm=key.split(".")[1], ind=ind + inputs=self.definition, inp_nm=key.split(".")[1], ind=ind ) setattr(inputs_copy, key.split(".")[1], val) # setting files_hash again in case it was cleaned by setting specific element @@ -358,7 +358,7 @@ def _checksum_states(self, state_index=None): else: checksum_list = [] if not hasattr(self.state, "inputs_ind"): - self.state.prepare_states(self.spec, cont_dim=self.cont_dim) + self.state.prepare_states(self.definition, cont_dim=self.cont_dim) self.state.prepare_inputs() for ind in range(len(self.state.inputs_ind)): checksum_list.append(self._checksum_states(state_index=ind)) diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 3212771434..e48bdda753 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -53,7 +53,7 @@ class MultiInputObj(list, ty.Generic[T]): # Since we can't create a NewType from a type union, we add a dummy type to the union -# so we can detect the MultiOutput in the input/output spec creation +# so we can detect the MultiOutput in the input/output definition creation class MultiOutputType: pass From 940f0048996a2c6ed40df1c1733286cf94e3f993 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:45:45 +1100 Subject: [PATCH 121/342] fixed mrtrix import --- new-docs/source/tutorial/advanced-execution.ipynb | 14 ++++++++------ new-docs/source/tutorial/getting-started.ipynb | 2 +- pyproject.toml | 1 + 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 7b16a1fe29..72906a98df 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -41,18 +41,20 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { "ename": "ImportError", - "evalue": "cannot import name 'MrGrid' from 'pydra.tasks.mrtrix3' (/Users/tclose/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/__init__.py)", + "evalue": "cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'MrGrid' from 'pydra.tasks.mrtrix3' (/Users/tclose/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/__init__.py)" + "Cell \u001b[0;32mIn[4], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Auto-generated, do not edit\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2gmwmi_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Gmwmi\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2vis_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Vis\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivettcheck_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FivettCheck\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/fivett2gmwmi_.py:7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m File, Directory \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage_mrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ImageIn, ImageOut, Tracks \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m specs, ShellCommandTask\n\u001b[1;32m 10\u001b[0m input_fields \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Arguments\u001b[39;00m\n\u001b[1;32m 12\u001b[0m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 104\u001b[0m ),\n\u001b[1;32m 105\u001b[0m ]\n\u001b[1;32m 107\u001b[0m Fivett2GmwmiInputSpec \u001b[38;5;241m=\u001b[39m specs\u001b[38;5;241m.\u001b[39mSpecInfo(\n\u001b[1;32m 108\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFivett2GmwmiInput\u001b[39m\u001b[38;5;124m\"\u001b[39m, fields\u001b[38;5;241m=\u001b[39minput_fields, bases\u001b[38;5;241m=\u001b[39m(specs\u001b[38;5;241m.\u001b[39mShellSpec,)\n\u001b[1;32m 109\u001b[0m )\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)" ] } ], @@ -61,7 +63,7 @@ "import tempfile\n", "from fileformats.medimage import Nifti\n", "from pydra.engine.submitter import Submitter\n", - "from pydra.tasks.mrtrix3 import MrGrid\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", "# Make directory filled with nifti files\n", "test_dir = Path(tempfile.mkdtemp())\n", @@ -209,7 +211,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Provenance\n", + "## Provenance and auditing\n", "\n", "Work in progress..." ] diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index c87709b7c4..0044c4783d 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -146,7 +146,7 @@ "metadata": {}, "outputs": [], "source": [ - "from pydra.tasks.mrtrix3 import MrGrid\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", "mrgrid = MrGrid(voxel=0.5).split(input=nifti_dir.iterdir())\n", diff --git a/pyproject.toml b/pyproject.toml index a37aafdd1f..f3d468e3b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ doc = [ "pandoc", "numpy", "scipy", + "pydra-mrtrix", "sphinx_rtd_theme", "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", From e15cb352bcd65e7795dc2deec060c810b01c8b14 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:47:40 +1100 Subject: [PATCH 122/342] finished playing with tutorials for now --- new-docs/source/tutorial/workflow.ipynb | 885 ++++++++++++------------ 1 file changed, 434 insertions(+), 451 deletions(-) diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index 7d3bd9d79b..32c1192053 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -1,453 +1,436 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Workflows\n", - "\n", - "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", - "Workflow definitions are dataclasses, which interchangeable with Python and shell tasks\n", - "definitions and executed in the same way." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Constructor functions\n", - "\n", - "Workflows are typically defined using the `pydra.design.workflow.define` decorator on \n", - "a \"constructor\" function that generates the workflow. For example, given two task\n", - "definitions, `Add` and `Mul`." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import workflow, python\n", - "\n", - "# Example python task definitions\n", - "@python.define\n", - "def Add(a, b):\n", - " return a + b\n", - "\n", - "\n", - "@python.define\n", - "def Mul(a, b):\n", - " return a * b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " we can create a simple workflow definition using `workflow.define` to decorate a function that constructs the workflow. Nodes are added to the workflow being constructed by calling `workflow.add` function." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define\n", - "def BasicWorkflow(a, b):\n", - " add = workflow.add(Add(a=a, b=b))\n", - " mul = workflow.add(Mul(a=add.out, b=b))\n", - " return mul.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`workflow.add` returns an \"outputs\" object corresponding to the definition added to the workflow. The fields of the outptus object can be referenced as inputs to downstream workflow nodes. Note that these fields are just placeholders for the values that will be returned and can't be used in conditional statements during workflow construction. The return value(s) of workflow constructor function are the placeholders of the fields that are to be the outputs of the workflow.\n", - "\n", - "It is also possible to define new tasks to add to the workflow inline the constructor and type the inputs and outputs of the workflow." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import shell\n", - "from fileformats import image, video\n", - "\n", - "@workflow.define\n", - "def ShellWorkflow(\n", - " input_video: video.Mp4,\n", - " watermark: image.Png,\n", - " watermark_dims: tuple[int, int] = (10, 10),\n", - ") -> video.Mp4:\n", - "\n", - " add_watermark = workflow.add(\n", - " shell.define(\n", - " \"ffmpeg -i -i \"\n", - " \"-filter_complex \"\n", - " )(\n", - " in_video=input_video,\n", - " watermark=watermark,\n", - " filter=\"overlay={}:{}\".format(*watermark_dims),\n", - " )\n", - " )\n", - " output_video = workflow.add(\n", - " shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - " )(in_video=add_watermark.out_video, width=1280, height=720)\n", - " ).out_video\n", - "\n", - " return output_video # test implicit detection of output name" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Accessing the workflow object\n", - "\n", - "If you need to access the workflow object being constructed from inside the constructor function you can use `workflow.this()`." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs=[\"divided\"])\n", - "def Divide(x, y):\n", - " return x / y\n", - "\n", - "\n", - "@workflow.define(outputs=[\"out1\", \"out2\"])\n", - "def DirectAccesWorkflow(a: int, b: float) -> tuple[float, float]:\n", - " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", - "\n", - " Args:\n", - " a: An integer input\n", - " b: A float input\n", - "\n", - " Returns:\n", - " out1: The first output\n", - " out2: The second output\n", - " \"\"\"\n", - "\n", - " wf = workflow.this()\n", - "\n", - " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", - " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", - " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", - "\n", - " # Alter one of the inputs to a node after it has been initialised\n", - " wf[\"Mul\"].inputs.y *= 2\n", - "\n", - " return mul.out, divide.divided" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Directly access the workflow being constructed also enables you to set the outputs of the workflow directly" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define(outputs={\"out1\": float, \"out2\": float})\n", - "def SetOutputsOfWorkflow(a: int, b: float):\n", - " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", - "\n", - " Args:\n", - " a: An integer input\n", - " b: A float input\n", - "\n", - " Returns:\n", - " out1: The first output\n", - " out2: The second output\n", - " \"\"\"\n", - "\n", - " wf = workflow.this()\n", - "\n", - " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", - " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", - " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", - "\n", - " # Alter one of the inputs to a node after it has been initialised\n", - " wf[\"Mul\"].inputs.y *= 2\n", - "\n", - " # Set the outputs of the workflow directly\n", - " wf.outputs.out1 = mul.out\n", - " wf.outputs.out2 = divide.divided" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Splitting/combining task inputs\n", - "\n", - "Sometimes, you might want to perform the same task over a set of input values/files, and then collect the results into a list to perform further processing. This can be achieved by using the `split` and `combine` methods" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define\n", - "def Sum(x: list[float]) -> float:\n", - " return sum(x)\n", - "\n", - "@workflow.define\n", - "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", - " # Multiply over all combinations of the elements of a and b, then combine the results\n", - " # for each a element into a list over each b element\n", - " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", - " # Sume the multiplications across all all b elements for each a element\n", - " sum = workflow.add(Sum(x=mul.out))\n", - " return sum.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The combination step doesn't have to be done on the same step as the split, in which case the splits propagate to downstream nodes" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define\n", - "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", - " mul = workflow.add(Mul()).split(x=a, y=b)\n", - " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", - " sum = workflow.add(Sum(x=add.out))\n", - " return sum.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more advanced discussion on the intricacies of splitting and combining see [Splitting and combining](../explanation/splitting-combining.html)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Nested and conditional workflows\n", - "\n", - "One of the most powerful features of Pydra is the ability to use inline Python code to conditionally add/omit nodes to workflow, and alter the parameterisation of the nodes, depending on inputs to the workflow " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define\n", - "def ConditionalWorkflow(\n", - " input_video: video.Mp4,\n", - " watermark: image.Png,\n", - " watermark_dims: tuple[int, int] | None = None,\n", - ") -> video.Mp4:\n", - "\n", - " if watermark_dims is not None:\n", - " add_watermark = workflow.add(\n", - " shell.define(\n", - " \"ffmpeg -i -i \"\n", - " \"-filter_complex \"\n", - " )(\n", - " in_video=input_video,\n", - " watermark=watermark,\n", - " filter=\"overlay={}:{}\".format(*watermark_dims),\n", - " )\n", - " )\n", - " handbrake_input = add_watermark.out_video\n", - " else:\n", - " handbrake_input = input_video\n", - "\n", - " output_video = workflow.add(\n", - " shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - " )(in_video=handbrake_input, width=1280, height=720)\n", - " ).out_video\n", - "\n", - " return output_video # test implicit detection of output name" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that outputs of upstream nodes cannot be used in conditional statements, since these are just placeholders at the time the workflow is being constructed. However, you can get around\n", - "this limitation by placing the conditional logic within a nested workflow" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define\n", - "def Subtract(x: float, y: float) -> float:\n", - " return x - y\n", - "\n", - "@workflow.define\n", - "def RecursiveNestedWorkflow(a: float, depth: int) -> float:\n", - " add = workflow.add(Add(x=a, y=1))\n", - " decrement_depth = workflow.add(Subtract(x=depth, y=1))\n", - " if depth > 0:\n", - " out_node = workflow.add(\n", - " RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out)\n", - " )\n", - " else:\n", - " out_node = add\n", - " return out_node.out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", - "placeholders see [Conditionals and lazy fields](../explanation/conditional-lazy.html)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Type-checking between nodes\n", - "\n", - "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", - "when values or upstream outputs are assigned to task definition inputs.\n", - "\n", - "Task input and output fields do not need to be assigned types, since they will default to `typing.Any`.\n", - "However, if they are assigned a type and a value or output from an upstream node conflicts\n", - "with the type, a `TypeError` will be raised at construction time.\n", - "\n", - "Note that the type-checking \"assumes the best\", and will pass if the upstream field is typed\n", - "by `Any` or a super-class of the field being assigned to. For example, an input of\n", - "`fileformats.generic.File` passed to a field expecting a `fileformats.image.Png` file type,\n", - "because `Png` is a subtype of `File`, where as `fileformats.image.Jpeg` input would fail\n", - "since it is clearly not the intended type.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from fileformats import generic\n", - "\n", - "Mp4Handbrake = shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - ")\n", - "\n", - "\n", - "QuicktimeHandbrake = shell.define(\n", - " \"HandBrakeCLI -i -o \"\n", - " \"--width --height \",\n", - ")\n", - "\n", - "@workflow.define\n", - "def TypeErrorWorkflow(\n", - " input_video: video.Mp4,\n", - " watermark: generic.File,\n", - " watermark_dims: tuple[int, int] = (10, 10),\n", - ") -> video.Mp4:\n", - "\n", - " add_watermark = workflow.add(\n", - " shell.define(\n", - " \"ffmpeg -i -i \"\n", - " \"-filter_complex \"\n", - " )(\n", - " in_video=input_video, # This is OK because in_video is typed Any\n", - " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", - " filter=\"overlay={}:{}\".format(*watermark_dims),\n", - " ),\n", - " name=\"add_watermark\",\n", - " )\n", - "\n", - " try:\n", - " handbrake = workflow.add(\n", - " QuicktimeHandbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", - " ) # This will raise a TypeError because the input video is an Mp4\n", - " except TypeError:\n", - " handbrake = workflow.add(\n", - " Mp4Handbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", - " ) # The type of the input video is now correct\n", - "\n", - " return handbrake.output_video" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For more detailed discussion on Pydra's type-checking see [Type Checking](../explanation/typing.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Workflows\n", + "\n", + "In Pydra, workflows are DAG of component tasks to be executed on specified inputs.\n", + "Workflow definitions are dataclasses, which interchangeable with Python and shell tasks\n", + "definitions and executed in the same way." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Constructor functions\n", + "\n", + "Workflows are typically defined using the `pydra.design.workflow.define` decorator on \n", + "a \"constructor\" function that generates the workflow. For example, given two task\n", + "definitions, `Add` and `Mul`." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import workflow, python\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " we can create a simple workflow definition using `workflow.define` to decorate a function that constructs the workflow. Nodes are added to the workflow being constructed by calling `workflow.add` function." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def BasicWorkflow(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`workflow.add` returns an \"outputs\" object corresponding to the definition added to the workflow. The fields of the outptus object can be referenced as inputs to downstream workflow nodes. Note that these fields are just placeholders for the values that will be returned and can't be used in conditional statements during workflow construction. The return value(s) of workflow constructor function are the placeholders of the fields that are to be the outputs of the workflow.\n", + "\n", + "It is also possible to define new tasks to add to the workflow inline the constructor and type the inputs and outputs of the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "from fileformats import image, video\n", + "\n", + "@workflow.define\n", + "def ShellWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=add_watermark.out_video, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Splitting/combining task inputs\n", + "\n", + "Sometimes, you might want to perform the same task over a set of input values/files, and then collect the results into a list to perform further processing. This can be achieved by using the `split` and `combine` methods" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Sum(x: list[float]) -> float:\n", + " return sum(x)\n", + "\n", + "@workflow.define\n", + "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", + " # Multiply over all combinations of the elements of a and b, then combine the results\n", + " # for each a element into a list over each b element\n", + " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", + " # Sume the multiplications across all all b elements for each a element\n", + " sum = workflow.add(Sum(x=mul.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The combination step doesn't have to be done on the same step as the split, in which case the splits propagate to downstream nodes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", + " mul = workflow.add(Mul()).split(x=a, y=b)\n", + " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", + " sum = workflow.add(Sum(x=add.out))\n", + " return sum.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more advanced discussion on the intricacies of splitting and combining see [Splitting and combining](../explanation/splitting-combining.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Nested and conditional workflows\n", + "\n", + "One of the most powerful features of Pydra is the ability to use inline Python code to conditionally add/omit nodes to workflow, and alter the parameterisation of the nodes, depending on inputs to the workflow " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define\n", + "def ConditionalWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: image.Png,\n", + " watermark_dims: tuple[int, int] | None = None,\n", + ") -> video.Mp4:\n", + "\n", + " if watermark_dims is not None:\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video,\n", + " watermark=watermark,\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " )\n", + " )\n", + " handbrake_input = add_watermark.out_video\n", + " else:\n", + " handbrake_input = input_video\n", + "\n", + " output_video = workflow.add(\n", + " shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + " )(in_video=handbrake_input, width=1280, height=720)\n", + " ).out_video\n", + "\n", + " return output_video # test implicit detection of output name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that outputs of upstream nodes cannot be used in conditional statements, since these are just placeholders at the time the workflow is being constructed. However, you can get around\n", + "this limitation by placing the conditional logic within a nested workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define\n", + "def Subtract(x: float, y: float) -> float:\n", + " return x - y\n", + "\n", + "@workflow.define\n", + "def RecursiveNestedWorkflow(a: float, depth: int) -> float:\n", + " add = workflow.add(Add(x=a, y=1))\n", + " decrement_depth = workflow.add(Subtract(x=depth, y=1))\n", + " if depth > 0:\n", + " out_node = workflow.add(\n", + " RecursiveNestedWorkflow(a=add.out, depth=decrement_depth.out)\n", + " )\n", + " else:\n", + " out_node = add\n", + " return out_node.out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion of the construction of conditional workflows and \"lazy field\"\n", + "placeholders see [Conditionals and lazy fields](../explanation/conditional-lazy.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Type-checking between nodes\n", + "\n", + "Pydra utilizes Python type annotations to implement strong type-checking, which is performed\n", + "when values or upstream outputs are assigned to task definition inputs.\n", + "\n", + "Task input and output fields do not need to be assigned types, since they will default to `typing.Any`.\n", + "However, if they are assigned a type and a value or output from an upstream node conflicts\n", + "with the type, a `TypeError` will be raised at construction time.\n", + "\n", + "Note that the type-checking \"assumes the best\", and will pass if the upstream field is typed\n", + "by `Any` or a super-class of the field being assigned to. For example, an input of\n", + "`fileformats.generic.File` passed to a field expecting a `fileformats.image.Png` file type,\n", + "because `Png` is a subtype of `File`, where as `fileformats.image.Jpeg` input would fail\n", + "since it is clearly not the intended type.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats import generic\n", + "\n", + "Mp4Handbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "\n", + "QuicktimeHandbrake = shell.define(\n", + " \"HandBrakeCLI -i -o \"\n", + " \"--width --height \",\n", + ")\n", + "\n", + "@workflow.define\n", + "def TypeErrorWorkflow(\n", + " input_video: video.Mp4,\n", + " watermark: generic.File,\n", + " watermark_dims: tuple[int, int] = (10, 10),\n", + ") -> video.Mp4:\n", + "\n", + " add_watermark = workflow.add(\n", + " shell.define(\n", + " \"ffmpeg -i -i \"\n", + " \"-filter_complex \"\n", + " )(\n", + " in_video=input_video, # This is OK because in_video is typed Any\n", + " watermark=watermark, # Type is OK because generic.File is superclass of image.Png\n", + " filter=\"overlay={}:{}\".format(*watermark_dims),\n", + " ),\n", + " name=\"add_watermark\",\n", + " )\n", + "\n", + " try:\n", + " handbrake = workflow.add(\n", + " QuicktimeHandbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # This will raise a TypeError because the input video is an Mp4\n", + " except TypeError:\n", + " handbrake = workflow.add(\n", + " Mp4Handbrake(in_video=add_watermark.out_video, width=1280, height=720),\n", + " ) # The type of the input video is now correct\n", + "\n", + " return handbrake.output_video" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For more detailed discussion on Pydra's type-checking see [Type Checking](../explanation/typing.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessing the workflow object\n", + "\n", + "If you need to access the workflow object being constructed from inside the constructor function you can use `workflow.this()`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"divided\"])\n", + "def Divide(x, y):\n", + " return x / y\n", + "\n", + "\n", + "@workflow.define(outputs=[\"out1\", \"out2\"])\n", + "def DirectAccesWorkflow(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " return mul.out, divide.divided" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Directly access the workflow being constructed also enables you to set the outputs of the workflow directly" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "@workflow.define(outputs={\"out1\": float, \"out2\": float})\n", + "def SetOutputsOfWorkflow(a: int, b: float):\n", + " \"\"\"A test workflow demonstration a few alternative ways to set and connect nodes\n", + "\n", + " Args:\n", + " a: An integer input\n", + " b: A float input\n", + "\n", + " Returns:\n", + " out1: The first output\n", + " out2: The second output\n", + " \"\"\"\n", + "\n", + " wf = workflow.this()\n", + "\n", + " add = wf.add(Add(x=a, y=b), name=\"addition\")\n", + " mul = wf.add(python.define(Mul, outputs={\"out\": float})(x=add.z, y=b))\n", + " divide = wf.add(Divide(x=wf[\"addition\"].lzout.z, y=mul.out), name=\"division\")\n", + "\n", + " # Alter one of the inputs to a node after it has been initialised\n", + " wf[\"Mul\"].inputs.y *= 2\n", + "\n", + " # Set the outputs of the workflow directly\n", + " wf.outputs.out1 = mul.out\n", + " wf.outputs.out2 = divide.divided" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 } From 96e0b92bbe877014ee8b1402ada4cafae450edcb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 16:51:41 +1100 Subject: [PATCH 123/342] fixed doc dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f3d468e3b4..e8ac80e72c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ doc = [ "pandoc", "numpy", "scipy", - "pydra-mrtrix", + "pydra-mrtrix3", "sphinx_rtd_theme", "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", From 3495b50a9586b29c01685a4a4f804757cac34418 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 18:03:35 +1100 Subject: [PATCH 124/342] download tags to docs build --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6301c4a3ab..fff42f2743 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -20,6 +20,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Set up Python uses: actions/setup-python@v5 with: From ec669e1ce03a564d14bf630858bccf48fea72f03 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 31 Dec 2024 18:05:07 +1100 Subject: [PATCH 125/342] more fixes to docs CI --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index fff42f2743..741cc868ba 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -48,6 +48,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Set up Python uses: actions/setup-python@v5 with: From e95e8ff668bc6ff9b5398cc3d16a3a50a14e4481 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 1 Jan 2025 13:26:23 +1100 Subject: [PATCH 126/342] commented out pydra-mrtrix3 dep for docs until new release --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e8ac80e72c..69a79f9b6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ doc = [ "pandoc", "numpy", "scipy", - "pydra-mrtrix3", + # "pydra-mrtrix3", "sphinx_rtd_theme", "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", From b5b95d30e65514f981f0bb661d7ae79cbb9d2bfa Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 1 Jan 2025 13:58:14 +1100 Subject: [PATCH 127/342] combined typing and hashing --- new-docs/source/explanation/hashing-caching.rst | 4 ---- new-docs/source/explanation/typing.rst | 4 ++-- new-docs/source/index.rst | 17 +++++++++-------- .../source/tutorial/advanced-execution.ipynb | 2 +- 4 files changed, 12 insertions(+), 15 deletions(-) delete mode 100644 new-docs/source/explanation/hashing-caching.rst diff --git a/new-docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst deleted file mode 100644 index d03d4b042c..0000000000 --- a/new-docs/source/explanation/hashing-caching.rst +++ /dev/null @@ -1,4 +0,0 @@ -Caching and hashing -=================== - -Work in progress.... diff --git a/new-docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst index c543c966cd..a49d304b09 100644 --- a/new-docs/source/explanation/typing.rst +++ b/new-docs/source/explanation/typing.rst @@ -1,4 +1,4 @@ -Type checking -============= +Typing, file-formats and hashing +================================ Work in progress... diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 72318fa64d..a69b8a13fc 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -4,7 +4,7 @@ Pydra ===== Pydra is a lightweight, Python 3.11+ dataflow engine for computational graph construction, -manipulation, and distributed execution. Designed as a successor to created for [Nipype](https://github.com/nipy/nipype), +manipulation, and distributed execution. Designed as a successor to created for Nipype_, Pydra is a general-purpose engine that supports analytics in any scientific domain. Pydra helps build reproducible, scalable, reusable, and fully automated, provenance tracked scientific workflows that combine Python functions and shell commands. @@ -13,11 +13,11 @@ The power of Pydra lies in ease of workflow creation and execution for complex multiparameter map-reduce operations, and the use of global cache. Pydra's key features are: -- Modular execution backends (see [Advanced execution](../tutorial/advanced-execution.html)) -- Map-reduce like semantics (see [Splitting and combining](../explanation/splitting-combining.html)) -- Global cache support to reduce recomputation (see [Hashing and caching](../explanation/hashing-caching.html)) -- Support for execution of Tasks in containerized environments (see [Environments](../explanation/environments.html)) -- Strong type-checking and type-hinting support (see [Typing](../explanation/typing.html)) +- Modular execution backends (see `Advanced execution <./tutorial/advanced-execution.html>`__) +- Map-reduce like semantics (see `Splitting and combining <./explanation/splitting-combining.html>`__) +- Global cache support to reduce recomputation (see `Hashing and caching <./explanation/hashing-caching.html>`__) +- Support for execution of Tasks in containerized environments (see `Environments <./explanation/environments.html>`__) +- Strong type-checking and type-hinting support (see `Typing <./explanation/typing.html>`__) See :ref:`Design philosophy` for more an explanation of the design of Pydra. @@ -118,9 +118,8 @@ Indices and tables explanation/design-approach explanation/splitting-combining - explanation/conditional-lazy explanation/typing - explanation/hashing-caching + explanation/conditional-lazy explanation/environments @@ -135,3 +134,5 @@ Indices and tables .. _ANTs: http://stnava.github.io/ANTs/ .. _AFNI: https://afni.nimh.nih.gov/ .. _niworkflows: https://niworkflows.readthedocs.io/en/latest/ +.. _Nipype: https://nipype.readthedocs.io/en/latest/ +.. _ diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 72906a98df..9b69208865 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -27,7 +27,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Caching results\n", + "## Caching\n", "\n", "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", "task and the operation to be performed. This hash is used to name the output directory for\n", From 1e8177436af0ba0745836dbe897b292586dde73d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 1 Jan 2025 14:02:11 +1100 Subject: [PATCH 128/342] renamed help_string to help --- docs/components.rst | 6 +- docs/input_spec.rst | 10 +- docs/output_spec.rst | 4 +- new-docs/source/tutorial/canonical-form.ipynb | 570 +++++++++--------- new-docs/source/tutorial/python.ipynb | 14 +- new-docs/source/tutorial/shell.ipynb | 82 +-- pydra/design/base.py | 38 +- pydra/design/boutiques.py | 8 +- pydra/design/python.py | 4 +- pydra/design/shell.py | 6 +- pydra/design/tests/test_python.py | 56 +- pydra/design/tests/test_shell.py | 140 +++-- pydra/design/tests/test_workflow.py | 14 +- pydra/design/workflow.py | 4 +- pydra/engine/core.py | 10 +- pydra/engine/specs.py | 6 +- pydra/engine/tests/test_dockertask.py | 34 +- pydra/engine/tests/test_environments.py | 6 +- pydra/engine/tests/test_helpers_file.py | 4 +- pydra/engine/tests/test_nipype1_convert.py | 10 +- pydra/engine/tests/test_shelltask.py | 398 ++++++------ .../engine/tests/test_shelltask_inputspec.py | 220 ++++--- pydra/engine/tests/test_singularity.py | 30 +- pydra/engine/tests/test_task.py | 48 +- pydra/utils/tests/utils.py | 12 +- 25 files changed, 853 insertions(+), 881 deletions(-) diff --git a/docs/components.rst b/docs/components.rst index 46dcacbe37..d35727f2a0 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -86,16 +86,16 @@ Shell Command Tasks name="Input", fields=[ ( "in_file", File, - { "help_string": "input file ...", + { "help": "input file ...", "position": 1, "mandatory": True } ), ( "out_file", str, - { "help_string": "name of output ...", + { "help": "name of output ...", "position": 2, "output_file_template": "{in_file}_br" } ), ( "mask", bool, - { "help_string": "create binary mask", + { "help": "create binary mask", "argstr": "-m", } ) ], bases=(ShellDef,) ) diff --git a/docs/input_spec.rst b/docs/input_spec.rst index bafaa37a82..4e1148c306 100644 --- a/docs/input_spec.rst +++ b/docs/input_spec.rst @@ -16,16 +16,16 @@ Let's start from the previous example: name="Input", fields=[ ( "in_file", File, - { "help_string": "input file ...", + { "help": "input file ...", "position": 1, "mandatory": True } ), ( "out_file", str, - { "help_string": "name of output ...", + { "help": "name of output ...", "position": 2, "output_file_template": "{in_file}_br" } ), ( "mask", bool, - { "help_string": "create binary mask", + { "help": "create binary mask", "argstr": "-m", } ) ], bases=(ShellDef,) ) @@ -109,10 +109,10 @@ There are also special types provided by Pydra: Metadata -------- -In the example we used multiple keys in the metadata dictionary including `help_string`, +In the example we used multiple keys in the metadata dictionary including `help`, `position`, etc. In this section all allowed key will be described: -`help_string` (`str`, mandatory): +`help` (`str`, mandatory): A short description of the input field. `mandatory` (`bool`, default: `False`): diff --git a/docs/output_spec.rst b/docs/output_spec.rst index 347b8b1a55..7ade54e2c4 100644 --- a/docs/output_spec.rst +++ b/docs/output_spec.rst @@ -23,7 +23,7 @@ a customized `output_spec` can be used, e.g.: type=File, metadata={ "output_file_template": "{inp1}", - "help_string": "output file", + "help": "output file", "requires": ["inp1", "inp2"] }, ), @@ -58,7 +58,7 @@ Metadata The metadata dictionary for `output_spec` can include: -`help_string` (`str`, mandatory): +`help` (`str`, mandatory): A short description of the input field. The same as in `input_spec`. `mandatory` (`bool`, default: `False`): diff --git a/new-docs/source/tutorial/canonical-form.ipynb b/new-docs/source/tutorial/canonical-form.ipynb index 6a957aa8fa..242e893510 100644 --- a/new-docs/source/tutorial/canonical-form.ipynb +++ b/new-docs/source/tutorial/canonical-form.ipynb @@ -1,287 +1,287 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Canonical (dataclass) task form\n", - "\n", - "Under the hood, all Python, shell and workflow task definitions generated by the\n", - "`pydra.design.*.define` decorators/functions are translated to\n", - "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", - "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", - "in the [Python-tasks](./python.html), [Shell-tasks](./shell.html) and [Workflow](./workflow.html)\n", - "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", - "for linters follow. Therefore, when designing task definitions to be used by third\n", - "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", - "explicit, \"canonical\" dataclass form.\n", - "\n", - "The syntax of the canonical form is close to that used by the\n", - "[Attrs](https://www.attrs.org/en/stable/) package itself, with class type annotations\n", - "used to define the fields of the inputs and outputs of the task. Tasks defined in canonical\n", - "form will be able to be statically type-checked by [MyPy](https://mypy-lang.org/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Python-task definitions\n", - "\n", - "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", - "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", - "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", - "Default values can also be set directly, as with Attrs classes.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pprint import pprint\n", - "from pydra.engine.helpers import fields_dict\n", - "from pydra.engine.specs import PythonDef, PythonOutputs\n", - "from pydra.design import python\n", - "\n", - "\n", - "@python.define\n", - "class CanonicalPythonDef:\n", - " \"\"\"Canonical Python task definition class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float = 2.0 # set default value\n", - "\n", - " class Outputs:\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a / b\n", - "\n", - "pprint(fields_dict(CanonicalPythonDef))\n", - "pprint(fields_dict(CanonicalPythonDef.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To set additional attributes other than the type and default, such as `allowed_values`\n", - "and `validators`, `python.arg` and `python.out` can be used instead." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import attrs.validators\n", - "\n", - "\n", - "@python.define\n", - "class CanonicalPythonDef:\n", - " \"\"\"Canonical Python task definition class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", - " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", - "\n", - " class Outputs:\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a / b\n", - "\n", - "pprint(fields_dict(CanonicalPythonDef))\n", - "pprint(fields_dict(CanonicalPythonDef.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to allow static type-checkers to check the type of outputs of tasks added\n", - "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", - "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", - "`define` method implicitly). Thus the \"canonical\" is as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "@python.define\n", - "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", - " \"\"\"Canonical Python task definition class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float = 2.0 # set default value\n", - "\n", - " class Outputs(PythonOutputs):\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a / b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Shell-task definitions\n", - "\n", - "The canonical form of shell tasks is the same as for Python tasks, except a string `executable`\n", - "attribute replaces the `function` staticmethod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from fileformats import generic\n", - "from pydra.design import shell\n", - "from pydra.engine.specs import ShellDef, ShellOutputs\n", - "from pydra.utils.typing import MultiInputObj\n", - "\n", - "\n", - "@shell.define\n", - "class CpWithSize(ShellDef[\"CpWithSize.Outputs\"]):\n", - "\n", - " executable = \"cp\"\n", - "\n", - " in_fs_objects: MultiInputObj[generic.FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\")\n", - " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", - "\n", - " class Outputs(ShellOutputs):\n", - "\n", - " @staticmethod\n", - " def get_file_size(out_file: Path) -> int:\n", - " \"\"\"Calculate the file size\"\"\"\n", - " result = os.stat(out_file)\n", - " return result.st_size\n", - "\n", - " out_file: generic.File\n", - " out_file_size: int = shell.out(callable=get_file_size)\n", - "\n", - "\n", - "pprint(fields_dict(CpWithSize))\n", - "pprint(fields_dict(CpWithSize.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Workflow definitions\n", - "\n", - "Workflows can also be defined in canonical form, which is the same as for Python tasks\n", - "but with a staticmethod called `constructor` that constructs the workflow." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import python, workflow\n", - "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", - "\n", - "# Example python task definitions\n", - "@python.define\n", - "def Add(a, b):\n", - " return a + b\n", - "\n", - "\n", - "@python.define\n", - "def Mul(a, b):\n", - " return a * b\n", - "\n", - "\n", - "@workflow.define\n", - "class CanonicalWorkflowDef(WorkflowDef[\"CanonicalWorkflowDef.Outputs\"]):\n", - "\n", - " @staticmethod\n", - " def a_converter(value):\n", - " if value is None:\n", - " return value\n", - " return float(value)\n", - "\n", - " a: int\n", - " b: float = workflow.arg(\n", - " help_string=\"A float input\",\n", - " converter=a_converter,\n", - " )\n", - "\n", - " @staticmethod\n", - " def constructor(a, b):\n", - " add = workflow.add(Add(a=a, b=b))\n", - " mul = workflow.add(Mul(a=add.out, b=b))\n", - " return mul.out\n", - "\n", - " class Outputs(WorkflowOutputs):\n", - " out: float" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Canonical (dataclass) task form\n", + "\n", + "Under the hood, all Python, shell and workflow task definitions generated by the\n", + "`pydra.design.*.define` decorators/functions are translated to\n", + "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", + "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", + "in the [Python-tasks](./python.html), [Shell-tasks](./shell.html) and [Workflow](./workflow.html)\n", + "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", + "for linters follow. Therefore, when designing task definitions to be used by third\n", + "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", + "explicit, \"canonical\" dataclass form.\n", + "\n", + "The syntax of the canonical form is close to that used by the\n", + "[Attrs](https://www.attrs.org/en/stable/) package itself, with class type annotations\n", + "used to define the fields of the inputs and outputs of the task. Tasks defined in canonical\n", + "form will be able to be statically type-checked by [MyPy](https://mypy-lang.org/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Python-task definitions\n", + "\n", + "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", + "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", + "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", + "Default values can also be set directly, as with Attrs classes.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "from pydra.engine.specs import PythonDef, PythonOutputs\n", + "from pydra.design import python\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef:\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0 # set default value\n", + "\n", + " class Outputs:\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To set additional attributes other than the type and default, such as `allowed_values`\n", + "and `validators`, `python.arg` and `python.out` can be used instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import attrs.validators\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef:\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", + " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", + "\n", + " class Outputs:\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In order to allow static type-checkers to check the type of outputs of tasks added\n", + "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", + "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", + "`define` method implicitly). Thus the \"canonical\" is as follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0 # set default value\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shell-task definitions\n", + "\n", + "The canonical form of shell tasks is the same as for Python tasks, except a string `executable`\n", + "attribute replaces the `function` staticmethod." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from fileformats import generic\n", + "from pydra.design import shell\n", + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "from pydra.utils.typing import MultiInputObj\n", + "\n", + "\n", + "@shell.define\n", + "class CpWithSize(ShellDef[\"CpWithSize.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[generic.FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\")\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " class Outputs(ShellOutputs):\n", + "\n", + " @staticmethod\n", + " def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + " out_file: generic.File\n", + " out_file_size: int = shell.out(callable=get_file_size)\n", + "\n", + "\n", + "pprint(fields_dict(CpWithSize))\n", + "pprint(fields_dict(CpWithSize.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workflow definitions\n", + "\n", + "Workflows can also be defined in canonical form, which is the same as for Python tasks\n", + "but with a staticmethod called `constructor` that constructs the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python, workflow\n", + "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b\n", + "\n", + "\n", + "@workflow.define\n", + "class CanonicalWorkflowDef(WorkflowDef[\"CanonicalWorkflowDef.Outputs\"]):\n", + "\n", + " @staticmethod\n", + " def a_converter(value):\n", + " if value is None:\n", + " return value\n", + " return float(value)\n", + "\n", + " a: int\n", + " b: float = workflow.arg(\n", + " help=\"A float input\",\n", + " converter=a_converter,\n", + " )\n", + "\n", + " @staticmethod\n", + " def constructor(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out\n", + "\n", + " class Outputs(WorkflowOutputs):\n", + " out: float" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/python.ipynb index 8d9370c523..140500343d 100644 --- a/new-docs/source/tutorial/python.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -119,8 +119,8 @@ "@python.define(\n", " inputs={\"a\": python.arg(allowed_values=[1, 2, 3]), \"b\": python.arg(default=10.0)},\n", " outputs={\n", - " \"c\": python.out(type=float, help_string=\"the sum of the inputs\"),\n", - " \"d\": python.out(type=float, help_string=\"the difference of the inputs\"),\n", + " \"c\": python.out(type=float, help=\"the sum of the inputs\"),\n", + " \"d\": python.out(type=float, help=\"the difference of the inputs\"),\n", " },\n", ")\n", "def AugmentedTaskDef(a, b):\n", @@ -195,11 +195,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help_string='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=EMPTY, help_string='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help_string='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help_string='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help_string='Product of a and b', requires=[], converter=None, validator=None)}\n" + "{'a': arg(name='a', type=, default=EMPTY, help='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'b': arg(name='b', type=, default=EMPTY, help='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", + " 'function': arg(name='function', type=typing.Callable, default=, help='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", + "{'c': out(name='c', type=, default=EMPTY, help='Sum of a and b', requires=[], converter=None, validator=None),\n", + " 'd': out(name='d', type=, default=EMPTY, help='Product of a and b', requires=[], converter=None, validator=None)}\n" ] } ], diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/shell.ipynb index c6f4e64afa..3b90c6488d 100644 --- a/new-docs/source/tutorial/shell.ipynb +++ b/new-docs/source/tutorial/shell.ipynb @@ -163,17 +163,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=str | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=6, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'recursive': arg(name='recursive', type=, default=False, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=str | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=6, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" ] } ], @@ -249,19 +249,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", - " 'recursive': arg(name='recursive', type=, default=False, help_string='If source_file designates a directory, cp copies the directory and the entire subtree connected at that point.', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", + " 'recursive': arg(name='recursive', type=, default=False, help='If source_file designates a directory, cp copies the directory and the entire subtree connected at that point.', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", + " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" ] } ], @@ -275,7 +275,7 @@ " \"--tuple-arg \"\n", " ),\n", " inputs={\"recursive\": shell.arg(\n", - " help_string=(\n", + " help=(\n", " \"If source_file designates a directory, cp copies the directory and \"\n", " \"the entire subtree connected at that point.\"\n", " )\n", @@ -365,17 +365,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=1, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'recursive': arg(name='recursive', type=, default=False, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help_string='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_file': out(name='out_file', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=None),\n", - " 'out_file_size': out(name='out_file_size', type=, default=EMPTY, help_string='', requires=[], converter=None, validator=None, callable=),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" + "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'int_arg': arg(name='int_arg', type=int | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=1, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'recursive': arg(name='recursive', type=, default=False, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", + " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", + "{'out_file': out(name='out_file', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, callable=None),\n", + " 'out_file_size': out(name='out_file_size', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, callable=),\n", + " 'return_code': out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", + " 'stderr': out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", + " 'stdout': out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" ] } ], @@ -432,8 +432,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "ACommand input fields: [arg(name='in_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None), outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='a-command', help_string=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n", - "ACommand input fields: [outarg(name='out_file', type=, default=EMPTY, help_string='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), out(name='out_file_size', type=, default=EMPTY, help_string='size of the output directory', requires=[], converter=None, validator=None, callable=), out(name='return_code', type=, default=EMPTY, help_string=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None), out(name='stdout', type=, default=EMPTY, help_string='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None), out(name='stderr', type=, default=EMPTY, help_string='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None)]\n" + "ACommand input fields: [arg(name='in_file', type=, default=EMPTY, help='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None), outarg(name='out_file', type=, default=EMPTY, help='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='a-command', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n", + "ACommand input fields: [outarg(name='out_file', type=, default=EMPTY, help='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), out(name='out_file_size', type=, default=EMPTY, help='size of the output directory', requires=[], converter=None, validator=None, callable=), out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None), out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None), out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None)]\n" ] } ], @@ -444,15 +444,15 @@ "ACommand = shell.define(\n", " \"a-command\",\n", " inputs={\n", - " \"in_file\": shell.arg(type=File, help_string=\"output file\", argstr=\"\", position=-2)\n", + " \"in_file\": shell.arg(type=File, help=\"output file\", argstr=\"\", position=-2)\n", " },\n", " outputs={\n", " \"out_file\": shell.outarg(\n", - " type=File, help_string=\"output file\", argstr=\"\", position=-1\n", + " type=File, help=\"output file\", argstr=\"\", position=-1\n", " ),\n", " \"out_file_size\": {\n", " \"type\": int,\n", - " \"help_string\": \"size of the output directory\",\n", + " \"help\": \"size of the output directory\",\n", " \"callable\": get_file_size,\n", " }\n", " },\n", diff --git a/pydra/design/base.py b/pydra/design/base.py index cbb9d8e5cf..5524cfe93b 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -166,7 +166,7 @@ class Field: from name to field, by default it is None default : Any, optional the default value for the field, by default it is EMPTY - help_string: str, optional + help: str, optional A short description of the input field. requires: str | list[str | list[str] | Requirement], optional The input fields that are required to be provided, along with the optional allowed @@ -186,7 +186,7 @@ class Field: default: ty.Any = attrs.field( default=EMPTY, converter=attrs.Converter(convert_default_value, takes_self=True) ) - help_string: str = "" + help: str = "" requires: list[RequirementSet] = attrs.field( factory=list, converter=requires_converter ) @@ -211,7 +211,7 @@ class Arg(Field): The type of the field, by default it is Any default : Any, optional the default value for the field, by default it is EMPTY - help_string: str + help: str A short description of the input field. allowed_values: list, optional List of allowed values for the field. @@ -253,7 +253,7 @@ class Out(Field): The type of the field, by default it is Any default : Any, optional the default value for the field, by default it is EMPTY - help_string: str, optional + help: str, optional A short description of the input field. requires: list, optional Names of the inputs that are required together with the field. @@ -315,8 +315,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: fields_dict[atr_name] = atr if atr_name in type_hints: atr.type = type_hints[atr_name] - if not atr.help_string: - atr.help_string = helps.get(atr_name, "") + if not atr.help: + atr.help = helps.get(atr_name, "") elif atr_name in type_hints: if atr_name in fields_dict: fields_dict[atr_name].type = type_hints[atr_name] @@ -325,13 +325,13 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: name=atr_name, type=type_hints[atr_name], default=atr, - help_string=helps.get(atr_name, ""), + help=helps.get(atr_name, ""), ) if auto_attribs: for atr_name, type_ in type_hints.items(): if atr_name not in list(fields_dict) + ["Task", "Outputs"]: fields_dict[atr_name] = field_type( - name=atr_name, type=type_, help_string=helps.get(atr_name, "") + name=atr_name, type=type_, help=helps.get(atr_name, "") ) return fields_dict @@ -582,18 +582,18 @@ def ensure_field_objects( ) else: arg.name = input_name - if not arg.help_string: - arg.help_string = input_helps.get(input_name, "") + if not arg.help: + arg.help = input_helps.get(input_name, "") elif is_type(arg): inputs[input_name] = arg_type( type=arg, name=input_name, - help_string=input_helps.get(input_name, ""), + help=input_helps.get(input_name, ""), ) elif isinstance(arg, dict): arg_kwds = copy(arg) - if "help_string" not in arg_kwds: - arg_kwds["help_string"] = input_helps.get(input_name, "") + if "help" not in arg_kwds: + arg_kwds["help"] = input_helps.get(input_name, "") inputs[input_name] = arg_type( name=input_name, **arg_kwds, @@ -616,18 +616,18 @@ def ensure_field_objects( ) else: out.name = output_name - if not out.help_string: - out.help_string = output_helps.get(output_name, "") + if not out.help: + out.help = output_helps.get(output_name, "") elif inspect.isclass(out) or ty.get_origin(out): outputs[output_name] = out_type( type=out, name=output_name, - help_string=output_helps.get(output_name, ""), + help=output_helps.get(output_name, ""), ) elif isinstance(out, dict): out_kwds = copy(out) - if "help_string" not in out_kwds: - out_kwds["help_string"] = output_helps.get(output_name, "") + if "help" not in out_kwds: + out_kwds["help"] = output_helps.get(output_name, "") outputs[output_name] = out_type( name=output_name, **out_kwds, @@ -637,7 +637,7 @@ def ensure_field_objects( name=output_name, type=ty.get_type_hints(out).get("return", ty.Any), callable=out, - help_string=re.split(r"\n\s*\n", out.__doc__)[0] if out.__doc__ else "", + help=re.split(r"\n\s*\n", out.__doc__)[0] if out.__doc__ else "", ) else: raise ValueError( diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 410f855341..20bcc3efd0 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -23,7 +23,7 @@ class arg(shell.arg): The type of the field, by default it is Any default : Any, optional the default value for the field, by default it is EMPTY - help_string: str + help: str A short description of the input field. allowed_values: list, optional List of allowed values for the field. @@ -57,7 +57,7 @@ class out(shell.out): The type of the field, by default it is Any default : Any, optional the default value for the field, by default it is EMPTY - help_string: str, optional + help: str, optional A short description of the input field. requires: list, optional Names of the inputs that are required together with the field. @@ -178,7 +178,7 @@ def _prepare_input_spec(bosh_spec: dict[str, ty.Any], names_subset=None): arg( name=name, type=tp, - help_string=input.get("description", None) or input["name"], + help=input.get("description", None) or input["name"], mandatory=not input["optional"], argstr=input.get("command-line-flag", None), ) @@ -212,7 +212,7 @@ def _prepare_output_spec(bosh_spec: dict[str, ty.Any], input_keys, names_subset= out( name=name, type=File, - help_string=output.get("description", None) or output["name"], + help=output.get("description", None) or output["name"], mandatory=not output["optional"], output_file_template=path_template, ) diff --git a/pydra/design/python.py b/pydra/design/python.py index 75b30c9107..128c583fe8 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -25,7 +25,7 @@ class arg(Arg): Parameters ---------- - help_string: str + help: str A short description of the input field. default : Any, optional the default value for the argument @@ -67,7 +67,7 @@ class out(Out): from name to field, by default it is None type: type, optional The type of the field, by default it is Any - help_string: str, optional + help: str, optional A short description of the input field. requires: list, optional Names of the inputs that are required together with the field. diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 5e38f9ffa4..9c70882e43 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -41,7 +41,7 @@ class arg(Arg): Parameters ---------- - help_string: str + help: str A short description of the input field. default : Any, optional the default value for the argument @@ -140,7 +140,7 @@ class outarg(Out, arg): Parameters ---------- - help_string: str + help: str A short description of the input field. default : Any, optional the default value for the argument @@ -364,7 +364,7 @@ def make( position=0, default=executable, validator=attrs.validators.min_len(1), - help_string=EXECUTABLE_HELP_STRING, + help=EXECUTABLE_HELP_STRING, ) # Set positions for the remaining inputs that don't have an explicit position diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 00d233846d..47d4347da7 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -61,19 +61,19 @@ def func(a: int) -> float: SampleDef = python.define( func, - inputs={"a": python.arg(help_string="The argument to be doubled")}, - outputs={"b": python.out(help_string="the doubled output", type=Decimal)}, + inputs={"a": python.arg(help="The argument to be doubled")}, + outputs={"b": python.out(help="the doubled output", type=Decimal)}, ) assert issubclass(SampleDef, PythonDef) inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ - python.arg(name="a", type=int, help_string="The argument to be doubled"), + python.arg(name="a", type=int, help="The argument to be doubled"), python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [ - python.out(name="b", type=Decimal, help_string="the doubled output"), + python.out(name="b", type=Decimal, help="the doubled output"), ] outputs = SampleDef.Outputs(b=Decimal(2.0)) assert isinstance(outputs.b, Decimal) @@ -175,8 +175,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ - python.arg(name="a", type=int, help_string="First input to be inputted"), - python.arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), python.arg( name="function", type=ty.Callable, @@ -184,8 +184,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help_string="Sum of a and b"), - python.out(name="d", type=float, help_string="product of a and b"), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="product of a and b"), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -210,8 +210,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ - python.arg(name="a", type=int, help_string="First input to be inputted"), - python.arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), python.arg( name="function", type=ty.Callable, @@ -219,8 +219,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help_string="Sum of a and b"), - python.out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -253,8 +253,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ - python.arg(name="a", type=int, help_string="First input to be inputted"), - python.arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), python.arg( name="function", type=ty.Callable, @@ -262,8 +262,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help_string="Sum of a and b"), - python.out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -301,8 +301,8 @@ def function(a, b): inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ - python.arg(name="a", type=int, help_string="First input to be inputted"), - python.arg(name="b", type=float, default=2.0, help_string="Second input"), + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, default=2.0, help="Second input"), python.arg( name="function", type=ty.Callable, @@ -310,8 +310,8 @@ def function(a, b): ), ] assert outputs == [ - python.out(name="c", type=float, help_string="Sum of a and b"), - python.out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert SampleDef.function.__name__ == "function" SampleDef(a=1) @@ -353,14 +353,14 @@ def function(a, b): def test_interface_with_class_no_auto_attribs(): @python.define(auto_attribs=False) class SampleDef: - a: int = python.arg(help_string="First input to be inputted") - b: float = python.arg(help_string="Second input") + a: int = python.arg(help="First input to be inputted") + b: float = python.arg(help="Second input") x: int class Outputs: - c: float = python.out(help_string="Sum of a and b") - d: float = python.out(help_string="Product of a and b") + c: float = python.out(help="Sum of a and b") + d: float = python.out(help="Product of a and b") y: str @@ -372,8 +372,8 @@ def function(a, b): inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ - python.arg(name="a", type=int, help_string="First input to be inputted"), - python.arg(name="b", type=float, help_string="Second input"), + python.arg(name="a", type=int, help="First input to be inputted"), + python.arg(name="b", type=float, help="Second input"), python.arg( name="function", type=ty.Callable, @@ -381,8 +381,8 @@ def function(a, b): ), ] assert outputs == [ - python.out(name="c", type=float, help_string="Sum of a and b"), - python.out(name="d", type=float, help_string="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert SampleDef.function.__name__ == "function" SampleDef(a=1, b=2.0) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 1f35b4e826..316c7db6f3 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -36,7 +36,7 @@ def test_interface_template(): default="cp", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg(name="in_path", type=FsObject, position=1), output, @@ -46,17 +46,17 @@ def test_interface_template(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] intf = Cp(in_path=File.mock("in-path.txt")) @@ -83,7 +83,7 @@ def test_interface_template_w_types_and_path_template_ext(): default="trim-png", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg(name="in_image", type=image.Png, position=1), output, @@ -93,17 +93,17 @@ def test_interface_template_w_types_and_path_template_ext(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] TrimPng(in_image=image.Png.mock()) @@ -123,7 +123,7 @@ def test_interface_template_w_modify(): default="trim-png", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="image", type=image.Png, position=1, copy_mode=File.CopyMode.copy @@ -138,17 +138,17 @@ def test_interface_template_w_modify(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] TrimPng(image=image.Png.mock()) @@ -181,7 +181,7 @@ def test_interface_template_more_complex(): default="cp", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " @@ -215,17 +215,17 @@ def test_interface_template_more_complex(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] Cp(in_fs_objects=[File.sample(), File.sample(seed=1)]) @@ -247,7 +247,7 @@ def test_interface_template_with_overrides_and_optionals(): "--int-arg " "--tuple-arg " ), - inputs={"recursive": shell.arg(help_string=RECURSIVE_HELP)}, + inputs={"recursive": shell.arg(help=RECURSIVE_HELP)}, outputs={ "out_dir": shell.outarg(position=-2), "out_file": shell.outarg(position=-1), @@ -279,7 +279,7 @@ def test_interface_template_with_overrides_and_optionals(): default="cp", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " @@ -289,7 +289,7 @@ def test_interface_template_with_overrides_and_optionals(): argstr="-R", type=bool, default=False, - help_string=RECURSIVE_HELP, + help=RECURSIVE_HELP, position=2, ), shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), @@ -313,17 +313,17 @@ def test_interface_template_with_overrides_and_optionals(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] @@ -354,7 +354,7 @@ def test_interface_template_with_defaults(): default="cp", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " @@ -378,17 +378,17 @@ def test_interface_template_with_defaults(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] Cp(in_fs_objects=[File.sample(), File.sample(seed=1)]) @@ -422,7 +422,7 @@ def test_interface_template_with_type_overrides(): default="cp", type=str | ty.Sequence[str], position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " @@ -448,17 +448,17 @@ def test_interface_template_with_type_overrides(): shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] @@ -472,17 +472,17 @@ class Ls(ShellDef["Ls.Outputs"]): executable = "ls" directory: Directory = shell.arg( - help_string="the directory to list the contents of", + help="the directory to list the contents of", argstr="", position=-1, ) hidden: bool = shell.arg( - help_string=("display hidden FS objects"), + help=("display hidden FS objects"), argstr="-a", default=False, ) long_format: bool = shell.arg( - help_string=( + help=( "display properties of FS object, such as permissions, size and " "timestamps " ), @@ -490,20 +490,20 @@ class Ls(ShellDef["Ls.Outputs"]): argstr="-l", ) human_readable: bool = shell.arg( - help_string="display file sizes in human readable form", + help="display file sizes in human readable form", argstr="-h", default=False, requires=["long_format"], ) complete_date: bool = shell.arg( - help_string="Show complete date in long format", + help="Show complete date in long format", argstr="-T", default=False, requires=["long_format"], xor=["date_format_str"], ) date_format_str: str | None = shell.arg( - help_string="format string for ", + help="format string for ", argstr="-D", default=None, requires=["long_format"], @@ -513,7 +513,7 @@ class Ls(ShellDef["Ls.Outputs"]): @shell.outputs class Outputs(ShellOutputs): entries: list = shell.out( - help_string="list of entries returned by ls command", + help="list of entries returned by ls command", callable=list_entries, ) @@ -523,18 +523,18 @@ class Outputs(ShellOutputs): inputs={ "directory": shell.arg( type=Directory, - help_string="the directory to list the contents of", + help="the directory to list the contents of", argstr="", position=-1, ), "hidden": shell.arg( type=bool, - help_string="display hidden FS objects", + help="display hidden FS objects", argstr="-a", ), "long_format": { # Mix it up with a full dictionary based definition "type": bool, - "help_string": ( + "help": ( "display properties of FS object, such as permissions, size and " "timestamps " ), @@ -542,13 +542,13 @@ class Outputs(ShellOutputs): }, "human_readable": shell.arg( type=bool, - help_string="display file sizes in human readable form", + help="display file sizes in human readable form", argstr="-h", requires=["long_format"], ), "complete_date": shell.arg( type=bool, - help_string="Show complete date in long format", + help="Show complete date in long format", argstr="-T", default=False, requires=["long_format"], @@ -556,7 +556,7 @@ class Outputs(ShellOutputs): ), "date_format_str": shell.arg( type=str | None, - help_string="format string for ", + help="format string for ", argstr="-D", requires=["long_format"], xor=["complete_date"], @@ -565,7 +565,7 @@ class Outputs(ShellOutputs): outputs={ "entries": shell.out( type=list, - help_string="list of entries returned by ls command", + help="list of entries returned by ls command", callable=list_entries, ) }, @@ -667,7 +667,7 @@ class Outputs: inputs={ "x": shell.arg( type=File, - help_string="an input file", + help="an input file", argstr="", position=1, ), @@ -675,7 +675,7 @@ class Outputs: outputs={ "y": shell.outarg( type=File, - help_string="path of output file", + help="path of output file", argstr="", path_template="{x}_out", ), @@ -699,11 +699,11 @@ class A: executable = "cp" - x: File = shell.arg(help_string="an input file", argstr="", position=1) + x: File = shell.arg(help="an input file", argstr="", position=1) class Outputs: y: File = shell.outarg( - help_string="the output file", + help="the output file", path_template="{x}_out", argstr="", position=-1, @@ -719,7 +719,7 @@ class Outputs: output = shell.outarg( name="y", type=File, - help_string="the output file", + help="the output file", path_template="{x}_out", argstr="", position=-1, @@ -732,12 +732,12 @@ class Outputs: type=str | ty.Sequence[str], argstr="", position=0, - help_string=shell.EXECUTABLE_HELP_STRING, + help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( name="x", type=File, - help_string="an input file", + help="an input file", argstr="", position=1, ), @@ -748,17 +748,17 @@ class Outputs: shell.out( name="return_code", type=int, - help_string=RETURN_CODE_HELP, + help=RETURN_CODE_HELP, ), shell.out( name="stderr", type=str, - help_string=STDERR_HELP, + help=STDERR_HELP, ), shell.out( name="stdout", type=str, - help_string=STDOUT_HELP, + help=STDOUT_HELP, ), ] @@ -770,7 +770,7 @@ def test_shell_output_field_name_dynamic(): inputs={ "x": shell.arg( type=File, - help_string="an input file", + help="an input file", argstr="", position=1, ), @@ -778,7 +778,7 @@ def test_shell_output_field_name_dynamic(): outputs={ "y": shell.outarg( type=File, - help_string="path of output file", + help="path of output file", argstr="", path_template="{x}_out", ), @@ -796,13 +796,11 @@ def get_file_size(y: Path): def test_shell_bases_dynamic(A, tmp_path): B = shell.define( name="B", - inputs={ - "y": shell.arg(type=File, help_string="output file", argstr="", position=-1) - }, + inputs={"y": shell.arg(type=File, help="output file", argstr="", position=-1)}, outputs={ "out_file_size": { "type": int, - "help_string": "size of the output directory", + "help": "size of the output directory", "callable": get_file_size, } }, @@ -863,7 +861,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): inputs={ "directory": shell.arg( type=Directory, - help_string="input directory", + help="input directory", argstr="", position=-1, ) @@ -871,7 +869,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): outputs={ "entries": shell.out( type=list, - help_string="list of entries returned by ls command", + help="list of entries returned by ls command", callable=list_entries, ) }, @@ -883,7 +881,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): "hidden": shell.arg( type=bool, argstr="-a", - help_string="show hidden files", + help="show hidden files", default=False, ) }, @@ -906,20 +904,18 @@ def test_shell_inputs_outputs_bases_static(tmp_path): class A: executable = "ls" - directory: Directory = shell.arg( - help_string="input directory", argstr="", position=-1 - ) + directory: Directory = shell.arg(help="input directory", argstr="", position=-1) class Outputs: entries: list = shell.out( - help_string="list of entries returned by ls command", + help="list of entries returned by ls command", callable=list_entries, ) @shell.define class B(A): hidden: bool = shell.arg( - help_string="show hidden files", + help="show hidden files", argstr="-a", default=False, ) @@ -941,12 +937,12 @@ def test_shell_missing_executable_static(): @shell.define class A: directory: Directory = shell.arg( - help_string="input directory", argstr="", position=-1 + help="input directory", argstr="", position=-1 ) class Outputs: entries: list = shell.out( - help_string="list of entries returned by ls command", + help="list of entries returned by ls command", callable=list_entries, ) @@ -961,7 +957,7 @@ def test_shell_missing_executable_dynamic(): inputs={ "directory": shell.arg( type=Directory, - help_string="input directory", + help="input directory", argstr="", position=-1, ), @@ -969,7 +965,7 @@ def test_shell_missing_executable_dynamic(): outputs={ "entries": shell.out( type=list, - help_string="list of entries returned by ls command", + help="list of entries returned by ls command", callable=list_entries, ) }, diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 10d091db3f..086155d8ba 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -137,7 +137,7 @@ class MyTestWorkflow(WorkflowDef["MyTestWorkflow.Outputs"]): a: int b: float = workflow.arg( - help_string="A float input", + help="A float input", converter=a_converter, ) @@ -159,9 +159,7 @@ class Outputs(WorkflowOutputs): # if this is a good idea or not assert sorted(list_fields(MyTestWorkflow), key=attrgetter("name")) == [ workflow.arg(name="a", type=int), - workflow.arg( - name="b", type=float, help_string="A float input", converter=a_converter - ), + workflow.arg(name="b", type=float, help="A float input", converter=a_converter), workflow.arg(name="constructor", type=ty.Callable, default=constructor), ] assert list_fields(MyTestWorkflow.Outputs) == [ @@ -262,15 +260,15 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: return mul.out, divide.divided assert list_fields(MyTestWorkflow) == [ - workflow.arg(name="a", type=int, help_string="An integer input"), - workflow.arg(name="b", type=float, help_string="A float input"), + workflow.arg(name="a", type=int, help="An integer input"), + workflow.arg(name="b", type=float, help="A float input"), workflow.arg( name="constructor", type=ty.Callable, default=MyTestWorkflow().constructor ), ] assert list_fields(MyTestWorkflow.Outputs) == [ - workflow.out(name="out1", type=float, help_string="The first output"), - workflow.out(name="out2", type=float, help_string="The second output"), + workflow.out(name="out1", type=float, help="The first output"), + workflow.out(name="out2", type=float, help="The second output"), ] workflow_spec = MyTestWorkflow(a=1, b=2.0) wf = Workflow.construct(workflow_spec) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 25d89f3e17..fe700c5cb0 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -27,7 +27,7 @@ class arg(Arg): Parameters ---------- - help_string: str + help: str A short description of the input field. default : Any, optional the default value for the argument @@ -72,7 +72,7 @@ class out(Out): from name to field, by default it is None type: type, optional The type of the field, by default it is Any - help_string: str, optional + help: str, optional A short description of the input field. requires: list, optional Names of the inputs that are required together with the field. diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 33dcb885c0..aa42e99e77 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -1020,8 +1020,8 @@ async def _run_task(self, submitter, rerun=False, environment=None): # wf_out_nm, lf = con # task_nm, task_out_nm = lf.name, lf.field # if task_out_nm == "all_": - # help_string = f"all outputs from {task_nm}" - # fields.append((wf_out_nm, dict, {"help_string": help_string})) + # help = f"all outputs from {task_nm}" + # fields.append((wf_out_nm, dict, {"help": help})) # else: # from pydra.utils.typing import TypeParser @@ -1029,14 +1029,14 @@ async def _run_task(self, submitter, rerun=False, environment=None): # # providing proper type and some help string # task_output_spec = getattr(self, task_nm).output_spec # out_fld = attr.fields_dict(task_output_spec)[task_out_nm] - # help_string = ( - # f"{out_fld.metadata.get('help_string', '')} (from {task_nm})" + # help = ( + # f"{out_fld.metadata.get('help', '')} (from {task_nm})" # ) # if TypeParser.get_origin(lf.type) is StateArray: # type_ = TypeParser.get_item_type(lf.type) # else: # type_ = lf.type - # fields.append((wf_out_nm, type_, {"help_string": help_string})) + # fields.append((wf_out_nm, type_, {"help": help})) # self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseDef,)) # logger.info("Added %s to %s", self.output_spec, self) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 5c18b27ae9..9d9f273202 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -484,9 +484,9 @@ class WorkflowDef(TaskDef[WorkflowOutputsType]): class ShellOutputs(TaskOutputs): """Output definition of a generic shell process.""" - return_code: int = shell.out(help_string=RETURN_CODE_HELP) - stdout: str = shell.out(help_string=STDOUT_HELP) - stderr: str = shell.out(help_string=STDERR_HELP) + return_code: int = shell.out(help=RETURN_CODE_HELP) + stdout: str = shell.out(help=STDOUT_HELP) + stderr: str = shell.out(help=STDERR_HELP) @classmethod def from_task( diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index c11d212a09..d6893fac5d 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -114,7 +114,7 @@ def test_docker_outputspec_1(plugin, tmp_path): customised output_spec, adding files to the output, providing specific pathname output_path is automatically added to the bindings """ - outputs = [shell.out(name="newfile", type=File, help_string="new file")] + outputs = [shell.out(name="newfile", type=File, help="new file")] docky = shell.define("touch newfile_tmp.txt", outputs=outputs)( environment=Docker(image="ubuntu") ) @@ -143,7 +143,7 @@ def test_docker_inputspec_1(tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", ) ] @@ -176,7 +176,7 @@ def test_docker_inputspec_1a(tmp_path): default=filename, position=1, argstr="", - help_string="input file", + help="input file", ) ] @@ -209,7 +209,7 @@ def test_docker_inputspec_2(plugin, tmp_path): type=File, position=1, argstr="", - help_string="input file 1", + help="input file 1", ), shell.arg( name="file2", @@ -217,7 +217,7 @@ def test_docker_inputspec_2(plugin, tmp_path): default=filename_2, position=2, argstr="", - help_string="input file 2", + help="input file 2", ), ] docky = shell.define(cmd, inputs=inputs)( @@ -253,7 +253,7 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): default=filename_1, position=1, argstr="", - help_string="input file 1", + help="input file 1", ), shell.arg( name="file2", @@ -261,7 +261,7 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): mandatory=True, position=2, argstr="", - help_string="input file 2", + help="input file 2", ), ] @@ -299,7 +299,7 @@ def test_docker_inputspec_2a(plugin, tmp_path): default=filename_1, position=1, argstr="", - help_string="input file 1", + help="input file 1", ), shell.arg( name="file2", @@ -307,7 +307,7 @@ def test_docker_inputspec_2a(plugin, tmp_path): mandatory=True, position=2, argstr="", - help_string="input file 2", + help="input file 2", ), ] @@ -338,7 +338,7 @@ def test_docker_inputspec_3(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", container_path=True, ) ] @@ -375,14 +375,14 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="orig file", + help="orig file", copyfile="copy", ), shell.arg( name="out_file", type=str, output_file_template="{orig_file}", - help_string="output file", + help="output file", ), ] @@ -426,7 +426,7 @@ def test_docker_inputspec_state_1(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", ) ] @@ -463,7 +463,7 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", ) ] docky = shell.define(cmd, inputs=inputs)( @@ -493,7 +493,7 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", ) ] @@ -536,7 +536,7 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", ) ] @@ -581,7 +581,7 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): mandatory=True, position=1, argstr="", - help_string="input file", + help="input file", ) ] diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 85366d6052..d306381c6a 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -178,7 +178,7 @@ def create_shelly_inputfile(tempdir, filename, name, executable): name="file", type=File, position=1, - help_string="files", + help="files", mandatory=True, argstr="", ) @@ -354,14 +354,14 @@ def create_shelly_outputfile(tempdir, filename, name, executable="cp"): name="file_orig", type=File, position=2, - help_string="new file", + help="new file", argstr="", ), shell.arg( name="file_copy", type=str, output_file_template="{file_orig}_copy", - help_string="output file", + help="output file", argstr="", ), ] diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index ee04791337..2d9d97cbd3 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -367,7 +367,7 @@ def test_output_template(tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ), @@ -380,7 +380,7 @@ def test_output_template(tmp_path): "position": 2, "argstr": "--opt", "output_file_template": "{in_file}.out", - "help_string": "optional file output", + "help": "optional file output", }, ), ), diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index c51ead7829..60739bd6e0 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -12,7 +12,7 @@ def find_txt(output_dir: Path) -> File: return files[0] -interf_inputs = [shell.arg(name="test", type=ty.Any, help_string="test")] +interf_inputs = [shell.arg(name="test", type=ty.Any, help="test")] interf_outputs = [shell.out(name="test_out", type=File, callable=find_txt)] @@ -26,7 +26,7 @@ class Interf_3(ShellDef["Interf_3.Outputs"]): executable = ["testing", "command"] - in_file: str = shell.arg(help_string="in_file", argstr="{in_file}") + in_file: str = shell.arg(help="in_file", argstr="{in_file}") @shell.outputs class Outputs(ShellOutputs): @@ -37,9 +37,7 @@ class Outputs(ShellOutputs): class TouchInterf(ShellDef["TouchInterf.Outputs"]): """class with customized input and executables""" - new_file: str = shell.outarg( - help_string="new_file", argstr="", path_template="{new_file}" - ) + new_file: str = shell.outarg(help="new_file", argstr="", path_template="{new_file}") executable = "touch" @shell.outputs @@ -57,7 +55,7 @@ def test_interface_specs_2(): """testing if class input/output definition are overwritten properly by the user's specs""" my_input_spec = SpecInfo( name="Input", - fields=[("my_inp", ty.Any, {"help_string": "my inp"})], + fields=[("my_inp", ty.Any, {"help": "my inp"})], bases=(ShellDef,), ) my_output_spec = SpecInfo( diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index e5507a24da..b8ee5494a6 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -292,7 +292,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): "opt_n", attr.ib( type=bool, - metadata={"position": 1, "argstr": "-n", "help_string": "option"}, + metadata={"position": 1, "argstr": "-n", "help": "option"}, ), ) ], @@ -333,14 +333,14 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): "opt_hello", attr.ib( type=str, - metadata={"position": 3, "help_string": "todo", "argstr": ""}, + metadata={"position": 3, "help": "todo", "argstr": ""}, ), ), ( "opt_n", attr.ib( type=bool, - metadata={"position": 1, "help_string": "todo", "argstr": "-n"}, + metadata={"position": 1, "help": "todo", "argstr": "-n"}, ), ), ], @@ -378,7 +378,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): type=str, metadata={ "position": 1, - "help_string": "text", + "help": "text", "mandatory": True, "argstr": "", }, @@ -415,7 +415,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): ( "text", str, - {"position": 1, "help_string": "text", "mandatory": True, "argstr": ""}, + {"position": 1, "help": "text", "mandatory": True, "argstr": ""}, ) ], bases=(ShellDef,), @@ -449,7 +449,7 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): type=str, metadata={ "position": 1, - "help_string": "text", + "help": "text", "mandatory": True, "argstr": "", }, @@ -483,7 +483,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): type=str, metadata={ "position": 1, - "help_string": "text", + "help": "text", "mandatory": True, "argstr": "", }, @@ -516,7 +516,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): default=None, metadata={ "position": 1, - "help_string": "text", + "help": "text", "mandatory": False, "argstr": "", }, @@ -549,7 +549,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): attr.ib( type=str, default="Hello", - metadata={"position": 1, "help_string": "text", "argstr": ""}, + metadata={"position": 1, "help": "text", "argstr": ""}, ), ) ], @@ -576,9 +576,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): cmd_exec = "echo" my_input_spec = SpecInfo( name="Input", - fields=[ - ("text", str, "Hello", {"position": 1, "help_string": "text", "argstr": ""}) - ], + fields=[("text", str, "Hello", {"position": 1, "help": "text", "argstr": ""})], bases=(ShellDef,), ) @@ -606,7 +604,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): attr.ib( type=str, default="Hi", - metadata={"position": 1, "help_string": "text", "argstr": ""}, + metadata={"position": 1, "help": "text", "argstr": ""}, ), ) ], @@ -638,7 +636,7 @@ def test_shell_cmd_inputspec_4c_exception(plugin): default="Hello", metadata={ "position": 1, - "help_string": "text", + "help": "text", "mandatory": True, "argstr": "", }, @@ -668,7 +666,7 @@ def test_shell_cmd_inputspec_4d_exception(plugin): default="Hello", metadata={ "position": 1, - "help_string": "text", + "help": "text", "output_file_template": "exception", "argstr": "", }, @@ -699,7 +697,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): type=bool, metadata={ "position": 1, - "help_string": "opt t", + "help": "opt t", "argstr": "-t", "xor": ["opt_S"], }, @@ -711,7 +709,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): type=bool, metadata={ "position": 2, - "help_string": "opt S", + "help": "opt S", "argstr": "-S", "xor": ["opt_t"], }, @@ -748,7 +746,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): type=bool, metadata={ "position": 1, - "help_string": "opt t", + "help": "opt t", "argstr": "-t", "xor": ["opt_S"], }, @@ -760,7 +758,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): type=bool, metadata={ "position": 2, - "help_string": "opt S", + "help": "opt S", "argstr": "-S", "xor": ["opt_t"], }, @@ -800,7 +798,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): type=bool, metadata={ "position": 2, - "help_string": "opt t", + "help": "opt t", "argstr": "-t", "requires": ["opt_l"], }, @@ -810,7 +808,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): "opt_l", attr.ib( type=bool, - metadata={"position": 1, "help_string": "opt l", "argstr": "-l"}, + metadata={"position": 1, "help": "opt l", "argstr": "-l"}, ), ), ], @@ -846,7 +844,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): type=bool, metadata={ "position": 2, - "help_string": "opt t", + "help": "opt t", "argstr": "-t", "requires": ["opt_l"], }, @@ -856,7 +854,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): "opt_l", attr.ib( type=bool, - metadata={"position": 1, "help_string": "opt l", "argstr": "-l"}, + metadata={"position": 1, "help": "opt l", "argstr": "-l"}, ), ), ], @@ -888,7 +886,7 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): type=bool, metadata={ "position": 2, - "help_string": "opt t", + "help": "opt t", "argstr": "-t", "requires": ["opt_l"], }, @@ -898,7 +896,7 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): "opt_l", attr.ib( type=bool, - metadata={"position": 1, "help_string": "opt l", "argstr": "-l"}, + metadata={"position": 1, "help": "opt l", "argstr": "-l"}, ), ), ], @@ -938,7 +936,7 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -983,7 +981,7 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): metadata={ "output_file_template": "{args}", "output_field_name": "out1_changed", - "help_string": "output file", + "help": "output file", }, ), ) @@ -1021,7 +1019,7 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): "newfile", attr.ib( type=str, - metadata={"position": 1, "help_string": "new file", "argstr": ""}, + metadata={"position": 1, "help": "new file", "argstr": ""}, ), ), ( @@ -1030,7 +1028,7 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{newfile}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -1069,7 +1067,7 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{args}.txt", - "help_string": "output file", + "help": "output file", }, ), ) @@ -1107,7 +1105,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): "newfile", attr.ib( type=str, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1117,7 +1115,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): metadata={ "position": 1, "argstr": "-t", - "help_string": "time of modif.", + "help": "time of modif.", }, ), ), @@ -1127,7 +1125,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{newfile}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -1164,7 +1162,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): "newfile", attr.ib( type=str, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1174,7 +1172,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): metadata={ "position": 1, "argstr": "-t {time}", - "help_string": "time of modif.", + "help": "time of modif.", }, ), ), @@ -1184,7 +1182,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{newfile}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -1225,7 +1223,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): "file_orig", attr.ib( type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1234,7 +1232,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): type=str, metadata={ "output_file_template": "{file_orig}_copy", - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -1278,7 +1276,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): "file_orig", attr.ib( type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1287,7 +1285,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): type=str, metadata={ "output_file_template": "{file_orig}_copy", - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -1325,7 +1323,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): "file_orig", attr.ib( type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1335,7 +1333,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): metadata={ "output_file_template": "{file_orig}_copy", "keep_extension": False, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -1376,7 +1374,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): "file_orig", attr.ib( type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1386,7 +1384,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): metadata={ "output_file_template": "{file_orig}", "keep_extension": False, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -1429,7 +1427,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): "file_orig", attr.ib( type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( @@ -1438,7 +1436,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): type=str, metadata={ "output_file_template": "{file_orig}_copy", - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -1489,7 +1487,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): "position": 1, "argstr": "...", "sep": " ", - "help_string": "list of files", + "help": "list of files", "mandatory": True, }, ), @@ -1537,7 +1535,7 @@ def test_shell_cmd_inputspec_10_err(tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "a file", + "help": "a file", "mandatory": True, }, ), @@ -1560,7 +1558,7 @@ def test_shell_cmd_inputspec_11(tmp_path): type=MultiInputObj[str], metadata={ "argstr": "...", - "help_string": "The list of input image files to be segmented.", + "help": "The list of input image files to be segmented.", }, ), ) @@ -1572,7 +1570,7 @@ def test_shell_cmd_inputspec_11(tmp_path): attr.ib( type=MultiOutputFile, metadata={ - "help_string": "Corrected Output Images: should specify the same number of images as inputVolume, if only one element is given, then it is used as a file pattern where %s is replaced by the imageVolumeType, and %d by the index list location.", + "help": "Corrected Output Images: should specify the same number of images as inputVolume, if only one element is given, then it is used as a file pattern where %s is replaced by the imageVolumeType, and %d by the index list location.", "output_file_template": "{inputFiles}", }, ), @@ -1633,14 +1631,14 @@ def template_function(inputs): "file_orig", attr.ib( type=File, - metadata={"position": 2, "help_string": "new file", "argstr": ""}, + metadata={"position": 2, "help": "new file", "argstr": ""}, ), ), ( "number", attr.ib( type=int, - metadata={"help_string": "a number", "mandatory": True}, + metadata={"help": "a number", "mandatory": True}, ), ), ( @@ -1649,7 +1647,7 @@ def template_function(inputs): type=str, metadata={ "output_file_template": template_function, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -1686,7 +1684,7 @@ def test_shell_cmd_inputspec_with_iterable(): "iterable_1", ty.Iterable[int], { - "help_string": "iterable input 1", + "help": "iterable input 1", "argstr": "--in1", }, ), @@ -1694,7 +1692,7 @@ def test_shell_cmd_inputspec_with_iterable(): "iterable_2", ty.Iterable[str], { - "help_string": "iterable input 2", + "help": "iterable input 2", "argstr": "--in2...", }, ), @@ -1732,7 +1730,7 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "orig file", + "help": "orig file", "mandatory": True, "copyfile": True, }, @@ -1744,7 +1742,7 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{orig_file}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -1794,7 +1792,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "orig file", + "help": "orig file", "mandatory": True, "copyfile": "hardlink", }, @@ -1806,7 +1804,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{orig_file}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -1873,7 +1871,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "orig file", + "help": "orig file", "mandatory": True, }, ), @@ -1884,7 +1882,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{orig_file}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -1923,7 +1921,7 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): type=str, metadata={ "position": 1, - "help_string": "text", + "help": "text", "mandatory": True, "argstr": "", }, @@ -1961,7 +1959,7 @@ def test_shell_cmd_inputspec_typeval_1(): "text", attr.ib( type=int, - metadata={"position": 1, "argstr": "", "help_string": "text"}, + metadata={"position": 1, "argstr": "", "help": "text"}, ), ) ], @@ -1980,7 +1978,7 @@ def test_shell_cmd_inputspec_typeval_2(): my_input_spec = SpecInfo( name="Input", - fields=[("text", int, {"position": 1, "argstr": "", "help_string": "text"})], + fields=[("text", int, {"position": 1, "argstr": "", "help": "text"})], bases=(ShellDef,), ) @@ -2000,7 +1998,7 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): ( "text", str, - {"position": 1, "help_string": "text", "mandatory": True, "argstr": ""}, + {"position": 1, "help": "text", "mandatory": True, "argstr": ""}, ) ], bases=(ShellDef,), @@ -2037,7 +2035,7 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2081,7 +2079,7 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): type=File, metadata={ "position": 1, - "help_string": "files", + "help": "files", "mandatory": True, "argstr": "", }, @@ -2131,7 +2129,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path metadata={ "position": 1, "argstr": "", - "help_string": "orig file", + "help": "orig file", "mandatory": True, "copyfile": "copy", }, @@ -2143,7 +2141,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path type=str, metadata={ "output_file_template": "{orig_file}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -2195,7 +2193,7 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2242,7 +2240,7 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2290,7 +2288,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2307,7 +2305,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): type=File, metadata={ "position": 1, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -2320,7 +2318,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): "position": 2, "argstr": "", "output_file_template": "{orig_file}_copy", - "help_string": "output file", + "help": "output file", }, ), ), @@ -2387,7 +2385,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2404,7 +2402,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): type=str, metadata={ "position": 1, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -2417,7 +2415,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): "position": 2, "argstr": "", "output_file_template": "{orig_file}_cp", - "help_string": "output file", + "help": "output file", }, ), ), @@ -2482,7 +2480,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2499,7 +2497,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): type=str, metadata={ "position": 1, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -2512,7 +2510,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): "position": 2, "argstr": "", "output_file_template": "{orig_file}_copy", - "help_string": "output file", + "help": "output file", }, ), ), @@ -2580,7 +2578,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): type=str, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2597,7 +2595,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): type=str, metadata={ "position": 1, - "help_string": "output file", + "help": "output file", "argstr": "", }, ), @@ -2610,7 +2608,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): "position": 2, "argstr": "", "output_file_template": "{orig_file}_copy", - "help_string": "output file", + "help": "output file", }, ), ), @@ -2924,7 +2922,7 @@ def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): type=File, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -2959,7 +2957,7 @@ def test_shell_cmd_outputspec_6a(): ( "out1", File, - {"output_file_template": "{args}", "help_string": "output file"}, + {"output_file_template": "{args}", "help": "output file"}, ) ], bases=(ShellOutputs,), @@ -2994,7 +2992,7 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): attr.ib( type=File, metadata={ - "help_string": "script file", + "help": "script file", "mandatory": True, "position": 1, "argstr": "", @@ -3009,7 +3007,7 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): "position": 2, "argstr": "...", "sep": " ", - "help_string": "list of name indices", + "help": "list of name indices", "mandatory": True, }, ), @@ -3027,7 +3025,7 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): type=MultiOutputFile, metadata={ "output_file_template": "file{files_id}.txt", - "help_string": "output file", + "help": "output file", }, ), ) @@ -3070,7 +3068,7 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): attr.ib( type=File, metadata={ - "help_string": "script file", + "help": "script file", "mandatory": True, "position": 1, "argstr": "", @@ -3085,7 +3083,7 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): "position": 2, "argstr": "...", "sep": " ", - "help_string": "list of name indices", + "help": "list of name indices", "mandatory": True, }, ), @@ -3103,7 +3101,7 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): type=MultiOutputFile, metadata={ "output_file_template": "file{files_id}.txt", - "help_string": "output file", + "help": "output file", }, ), ) @@ -3155,7 +3153,7 @@ def get_stderr(stderr): type=File, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -3163,7 +3161,7 @@ def get_stderr(stderr): "out_file_index", attr.ib( type=int, - metadata={"help_string": "output file", "callable": get_file_index}, + metadata={"help": "output file", "callable": get_file_index}, ), ), ( @@ -3171,7 +3169,7 @@ def get_stderr(stderr): attr.ib( type=str, metadata={ - "help_string": "The standard error output", + "help": "The standard error output", "callable": get_stderr, }, ), @@ -3203,9 +3201,7 @@ def test_shell_cmd_outputspec_8b_error(): fields=[ ( "out", - attr.ib( - type=int, metadata={"help_string": "output file", "value": "val"} - ), + attr.ib(type=int, metadata={"help": "output file", "value": "val"}), ) ], bases=(ShellOutputs,), @@ -3239,7 +3235,7 @@ def get_lowest_directory(directory_path): type=Directory, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -3282,7 +3278,7 @@ def get_lowest_directory(directory_path): type=str, metadata={ "position": 1, - "help_string": "new directory", + "help": "new directory", "argstr": "", }, ), @@ -3300,7 +3296,7 @@ def get_lowest_directory(directory_path): type=Directory, metadata={ "output_file_template": "{resultsDir}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -3347,7 +3343,7 @@ def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): type=File, metadata={ "output_file_template": "{args}", - "help_string": "output file", + "help": "output file", }, ), ) @@ -3413,12 +3409,12 @@ def test_shell_cmd_inputspec_outputspec_1(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), ( "file2", str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, + {"help": "2nd creadted file", "argstr": "", "position": 2}, ), ], bases=(ShellDef,), @@ -3430,12 +3426,12 @@ def test_shell_cmd_inputspec_outputspec_1(): ( "newfile1", File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, + {"output_file_template": "{file1}", "help": "newfile 1"}, ), ( "newfile2", File, - {"output_file_template": "{file2}", "help_string": "newfile 2"}, + {"output_file_template": "{file2}", "help": "newfile 2"}, ), ], bases=(ShellOutputs,), @@ -3467,12 +3463,12 @@ def test_shell_cmd_inputspec_outputspec_1a(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), ( "file2", str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, + {"help": "2nd creadted file", "argstr": "", "position": 2}, ), ], bases=(ShellDef,), @@ -3484,12 +3480,12 @@ def test_shell_cmd_inputspec_outputspec_1a(): ( "newfile1", File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, + {"output_file_template": "{file1}", "help": "newfile 1"}, ), ( "newfile2", File, - {"output_file_template": "{file2}", "help_string": "newfile 2"}, + {"output_file_template": "{file2}", "help": "newfile 2"}, ), ], bases=(ShellOutputs,), @@ -3520,12 +3516,12 @@ def test_shell_cmd_inputspec_outputspec_2(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), ( "file2", str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, + {"help": "2nd creadted file", "argstr": "", "position": 2}, ), ], bases=(ShellDef,), @@ -3539,7 +3535,7 @@ def test_shell_cmd_inputspec_outputspec_2(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1"], }, ), @@ -3548,7 +3544,7 @@ def test_shell_cmd_inputspec_outputspec_2(): File, { "output_file_template": "{file2}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1", "file2"], }, ), @@ -3587,12 +3583,12 @@ def test_shell_cmd_inputspec_outputspec_2a(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), ( "file2", str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, + {"help": "2nd creadted file", "argstr": "", "position": 2}, ), ], bases=(ShellDef,), @@ -3606,7 +3602,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1"], }, ), @@ -3615,7 +3611,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): File, { "output_file_template": "{file2}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1", "file2"], }, ), @@ -3662,14 +3658,14 @@ def test_shell_cmd_inputspec_outputspec_3(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), ( "file2", str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, + {"help": "2nd creadted file", "argstr": "", "position": 2}, ), - ("additional_inp", int, {"help_string": "additional inp"}), + ("additional_inp", int, {"help": "additional inp"}), ], bases=(ShellDef,), ) @@ -3680,14 +3676,14 @@ def test_shell_cmd_inputspec_outputspec_3(): ( "newfile1", File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, + {"output_file_template": "{file1}", "help": "newfile 1"}, ), ( "newfile2", File, { "output_file_template": "{file2}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1", "additional_inp"], }, ), @@ -3723,14 +3719,14 @@ def test_shell_cmd_inputspec_outputspec_3a(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), ( "file2", str, - {"help_string": "2nd creadted file", "argstr": "", "position": 2}, + {"help": "2nd creadted file", "argstr": "", "position": 2}, ), - ("additional_inp", str, {"help_string": "additional inp"}), + ("additional_inp", str, {"help": "additional inp"}), ], bases=(ShellDef,), ) @@ -3741,14 +3737,14 @@ def test_shell_cmd_inputspec_outputspec_3a(): ( "newfile1", File, - {"output_file_template": "{file1}", "help_string": "newfile 1"}, + {"output_file_template": "{file1}", "help": "newfile 1"}, ), ( "newfile2", File, { "output_file_template": "{file2}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1", "additional_inp"], }, ), @@ -3797,9 +3793,9 @@ def test_shell_cmd_inputspec_outputspec_4(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp", int, {"help_string": "additional inp"}), + ("additional_inp", int, {"help": "additional inp"}), ], bases=(ShellDef,), ) @@ -3812,7 +3808,7 @@ def test_shell_cmd_inputspec_outputspec_4(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1", ("additional_inp", [2, 3])], }, ) @@ -3852,9 +3848,9 @@ def test_shell_cmd_inputspec_outputspec_4a(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp", int, {"help_string": "additional inp"}), + ("additional_inp", int, {"help": "additional inp"}), ], bases=(ShellDef,), ) @@ -3867,7 +3863,7 @@ def test_shell_cmd_inputspec_outputspec_4a(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", "requires": ["file1", ("additional_inp", [2, 3])], }, ) @@ -3902,10 +3898,10 @@ def test_shell_cmd_inputspec_outputspec_5(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp_A", int, {"help_string": "additional inp A"}), - ("additional_inp_B", str, {"help_string": "additional inp B"}), + ("additional_inp_A", int, {"help": "additional inp A"}), + ("additional_inp_B", str, {"help": "additional inp B"}), ], bases=(ShellDef,), ) @@ -3918,7 +3914,7 @@ def test_shell_cmd_inputspec_outputspec_5(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... "requires": [ ["file1", "additional_inp_A"], @@ -3956,10 +3952,10 @@ def test_shell_cmd_inputspec_outputspec_5a(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), - ("additional_inp_B", int, {"help_string": "additional inp B"}), + ("additional_inp_A", str, {"help": "additional inp A"}), + ("additional_inp_B", int, {"help": "additional inp B"}), ], bases=(ShellDef,), ) @@ -3972,7 +3968,7 @@ def test_shell_cmd_inputspec_outputspec_5a(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... "requires": [ ["file1", "additional_inp_A"], @@ -4010,10 +4006,10 @@ def test_shell_cmd_inputspec_outputspec_5b(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), - ("additional_inp_B", str, {"help_string": "additional inp B"}), + ("additional_inp_A", str, {"help": "additional inp A"}), + ("additional_inp_B", str, {"help": "additional inp B"}), ], bases=(ShellDef,), ) @@ -4026,7 +4022,7 @@ def test_shell_cmd_inputspec_outputspec_5b(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... "requires": [ ["file1", "additional_inp_A"], @@ -4063,9 +4059,9 @@ def test_shell_cmd_inputspec_outputspec_6_except(): ( "file1", str, - {"help_string": "1st creadted file", "argstr": "", "position": 1}, + {"help": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), + ("additional_inp_A", str, {"help": "additional inp A"}), ], bases=(ShellDef,), ) @@ -4078,7 +4074,7 @@ def test_shell_cmd_inputspec_outputspec_6_except(): File, { "output_file_template": "{file1}", - "help_string": "newfile 1", + "help": "newfile 1", # requires has invalid syntax "requires": [["file1", "additional_inp_A"], "file1"], }, @@ -4130,7 +4126,7 @@ def change_name(file): attr.ib( type=File, metadata={ - "help_string": "input file to skull strip", + "help": "input file to skull strip", "position": 1, "mandatory": True, "argstr": "", @@ -4142,7 +4138,7 @@ def change_name(file): attr.ib( type=str, metadata={ - "help_string": "name of output skull stripped image", + "help": "name of output skull stripped image", "position": 2, "argstr": "", "output_file_template": "{in_file}_brain", @@ -4154,7 +4150,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "create surface outline image", + "help": "create surface outline image", "argstr": "-o", }, ), @@ -4164,7 +4160,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "create binary mask image", + "help": "create binary mask image", "argstr": "-m", }, ), @@ -4173,7 +4169,7 @@ def change_name(file): "skull", attr.ib( type=bool, - metadata={"help_string": "create skull image", "argstr": "-s"}, + metadata={"help": "create skull image", "argstr": "-s"}, ), ), ( @@ -4181,7 +4177,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "Don't generate segmented output", + "help": "Don't generate segmented output", "argstr": "-n", }, ), @@ -4191,7 +4187,7 @@ def change_name(file): attr.ib( type=float, metadata={ - "help_string": "fractional intensity threshold", + "help": "fractional intensity threshold", "argstr": "-f", }, ), @@ -4201,7 +4197,7 @@ def change_name(file): attr.ib( type=float, metadata={ - "help_string": "vertical gradient in fractional intensity threshold (-1, 1)", + "help": "vertical gradient in fractional intensity threshold (-1, 1)", "argstr": "-g", "allowed_values": {"min_val": -1, "max_val": 1}, }, @@ -4209,16 +4205,14 @@ def change_name(file): ), ( "radius", - attr.ib( - type=int, metadata={"argstr": "-r", "help_string": "head radius"} - ), + attr.ib(type=int, metadata={"argstr": "-r", "help": "head radius"}), ), ( "center", attr.ib( type=ty.List[int], metadata={ - "help_string": "center of gravity in voxels", + "help": "center of gravity in voxels", "argstr": "-c", "allowed_values": {"min_value": 0, "max_value": 3}, }, @@ -4230,7 +4224,7 @@ def change_name(file): type=bool, metadata={ "argstr": "-t", - "help_string": "apply thresholding to segmented brain image and mask", + "help": "apply thresholding to segmented brain image and mask", }, ), ), @@ -4240,7 +4234,7 @@ def change_name(file): type=bool, metadata={ "argstr": "-e", - "help_string": "generate a vtk mesh brain surface", + "help": "generate a vtk mesh brain surface", }, ), ), @@ -4249,7 +4243,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "robust brain centre estimation (iterates BET several times)", + "help": "robust brain centre estimation (iterates BET several times)", "argstr": "-R", "xor": _xor_inputs, }, @@ -4260,7 +4254,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "improve BET if FOV is very small in Z (by temporarily padding end slices", + "help": "improve BET if FOV is very small in Z (by temporarily padding end slices", "argstr": "-Z", "xor": _xor_inputs, }, @@ -4271,7 +4265,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "eye & optic nerve cleanup (can be useful in SIENA)", + "help": "eye & optic nerve cleanup (can be useful in SIENA)", "argstr": "-S", "xor": _xor_inputs, }, @@ -4282,7 +4276,7 @@ def change_name(file): attr.ib( type=bool, metadata={ - "help_string": "run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", + "help": "run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", "argstr": "-A", "xor": _xor_inputs, }, @@ -4293,7 +4287,7 @@ def change_name(file): attr.ib( type=ty.Union[File, str], metadata={ - "help_string": "as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", + "help": "as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", "argstr": "-A2", "xor": _xor_inputs, }, @@ -4306,7 +4300,7 @@ def change_name(file): metadata={ "argstr": "-F", "xor": _xor_inputs, - "help_string": "apply to 4D fMRI data", + "help": "apply to 4D fMRI data", }, ), ), @@ -4317,16 +4311,16 @@ def change_name(file): metadata={ "argstr": "-B", "xor": _xor_inputs, - "help_string": "bias field and neck cleanup", + "help": "bias field and neck cleanup", }, ), ), - # ("number_classes", int, attr.ib(metadata={"help_string": 'number of tissue-type classes', "argstr": '-n', + # ("number_classes", int, attr.ib(metadata={"help": 'number of tissue-type classes', "argstr": '-n', # "allowed_values": {"min_val": 1, "max_val": 10}})), # ("output_biasfield", bool, - # attr.ib(metadata={"help_string": 'output estimated bias field', "argstr": '-b'})), + # attr.ib(metadata={"help": 'output estimated bias field', "argstr": '-b'})), # ("output_biascorrected", bool, - # attr.ib(metadata={"help_string": 'output restored image (bias-corrected image)', "argstr": '-B'})), + # attr.ib(metadata={"help": 'output restored image (bias-corrected image)', "argstr": '-B'})), ], bases=(ShellDef,), ) @@ -4353,9 +4347,7 @@ def test_shell_cmd_optional_output_file1(tmp_path): fields=[ ( "input", - attr.ib( - type=File, metadata={"argstr": "", "help_string": "input file"} - ), + attr.ib(type=File, metadata={"argstr": "", "help": "input file"}), ), ( "output", @@ -4364,7 +4356,7 @@ def test_shell_cmd_optional_output_file1(tmp_path): metadata={ "argstr": "", "output_file_template": "out.txt", - "help_string": "output file", + "help": "output file", }, ), ), @@ -4376,7 +4368,7 @@ def test_shell_cmd_optional_output_file1(tmp_path): metadata={ "argstr": "--not-used", "output_file_template": "out.txt", - "help_string": "dummy output", + "help": "dummy output", }, ), ), @@ -4404,9 +4396,7 @@ def test_shell_cmd_optional_output_file2(tmp_path): fields=[ ( "input", - attr.ib( - type=File, metadata={"argstr": "", "help_string": "input file"} - ), + attr.ib(type=File, metadata={"argstr": "", "help": "input file"}), ), ( "output", @@ -4416,7 +4406,7 @@ def test_shell_cmd_optional_output_file2(tmp_path): metadata={ "argstr": "", "output_file_template": "out.txt", - "help_string": "dummy output", + "help": "dummy output", }, ), ), @@ -4451,7 +4441,7 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): attr.ib( type=str, metadata={ - "help_string": """ + "help": """ base name of the pretend outputs. """, "mandatory": True, @@ -4469,7 +4459,7 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #1", + "help": "fictional output #1", "output_file_template": "{out_name}_1.nii", }, ), @@ -4479,7 +4469,7 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #2", + "help": "fictional output #2", "output_file_template": "{out_name}_2.nii", }, ), @@ -4512,7 +4502,7 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): attr.ib( type=str, metadata={ - "help_string": """ + "help": """ base name of the pretend outputs. """, "mandatory": True, @@ -4531,7 +4521,7 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #1", + "help": "fictional output #1", "output_file_template": "{out_name}_1.nii", }, ), @@ -4541,7 +4531,7 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #2", + "help": "fictional output #2", "output_file_template": "{out_name}_2.nii", }, ), @@ -4578,7 +4568,7 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): attr.ib( type=str, metadata={ - "help_string": """ + "help": """ base name of the pretend outputs. """, "mandatory": True, @@ -4597,7 +4587,7 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #1", + "help": "fictional output #1", "output_file_template": "{out_name}_1.nii", "mandatory": True, }, @@ -4608,7 +4598,7 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #2", + "help": "fictional output #2", "output_file_template": "{out_name}_2.nii", }, ), @@ -4645,7 +4635,7 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): attr.ib( type=str, metadata={ - "help_string": """ + "help": """ base name of the pretend outputs. """, "mandatory": True, @@ -4664,7 +4654,7 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #1", + "help": "fictional output #1", "output_file_template": "{out_name}_1.nii", "mandatory": True, }, @@ -4675,7 +4665,7 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): attr.ib( type=File, metadata={ - "help_string": "fictional output #2", + "help": "fictional output #2", "output_file_template": "{out_name}_2.nii", "mandatory": True, }, @@ -4710,7 +4700,7 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): attr.ib( type=MultiInputObj, metadata={ - "help_string": """ + "help": """ base name of the pretend outputs. """, "mandatory": True, @@ -4729,7 +4719,7 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): attr.ib( type=MultiOutputFile, metadata={ - "help_string": "fictional output #1", + "help": "fictional output #1", "output_file_template": "{out_name}", }, ), @@ -4763,7 +4753,7 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): attr.ib( type=MultiInputObj, metadata={ - "help_string": """ + "help": """ base name of the pretend outputs. """, "sep": " test_1_real.nii", # hacky way of creating an extra file with that name @@ -4783,7 +4773,7 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): attr.ib( type=MultiOutputFile, metadata={ - "help_string": "fictional output #1", + "help": "fictional output #1", "output_file_template": "{out_name}_real.nii", }, ), @@ -4824,7 +4814,7 @@ def spec_info(formatter): attr.ib( type=str, metadata={ - "help_string": """ + "help": """ just a dummy name """, "mandatory": True, @@ -4836,7 +4826,7 @@ def spec_info(formatter): attr.ib( type=str, metadata={ - "help_string": """ + "help": """ just a dummy name """, "mandatory": True, @@ -4848,7 +4838,7 @@ def spec_info(formatter): attr.ib( type=ty.List, metadata={ - "help_string": """ + "help": """ combines in1 and in2 into a list """, # When providing a formatter all other metadata options are discarded. @@ -4943,7 +4933,7 @@ def spec_info(formatter): attr.ib( type=str, metadata={ - "help_string": "in1", + "help": "in1", }, ), ), @@ -4952,7 +4942,7 @@ def spec_info(formatter): attr.ib( type=str, metadata={ - "help_string": "in2", + "help": "in2", }, ), ), @@ -4961,7 +4951,7 @@ def spec_info(formatter): attr.ib( type=ty.List, metadata={ - "help_string": """ + "help": """ uses in1 """, # When providing a formatter all other metadata options are discarded. @@ -5020,7 +5010,7 @@ def test_shellcommand_error_msg(tmp_path): ( "in1", str, - {"help_string": "a dummy string", "argstr": "", "mandatory": True}, + {"help": "a dummy string", "argstr": "", "mandatory": True}, ), ], bases=(ShellDef,), diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index cd491670fb..78815655ce 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -34,7 +34,7 @@ def test_shell_cmd_inputs_1(): "inpA", attr.ib( type=str, - metadata={"position": 1, "help_string": "inp1", "argstr": ""}, + metadata={"position": 1, "help": "inp1", "argstr": ""}, ), ) ], @@ -51,9 +51,7 @@ def test_shell_cmd_inputs_1a(): """additional input without provided position""" my_input_spec = SpecInfo( name="Input", - fields=[ - ("inpA", attr.ib(type=str, metadata={"help_string": "inpA", "argstr": ""})) - ], + fields=[("inpA", attr.ib(type=str, metadata={"help": "inpA", "argstr": ""}))], bases=(ShellDef,), ) @@ -73,7 +71,7 @@ def test_shell_cmd_inputs_1b(): "inpA", attr.ib( type=str, - metadata={"position": -1, "help_string": "inpA", "argstr": ""}, + metadata={"position": -1, "help": "inpA", "argstr": ""}, ), ) ], @@ -97,7 +95,7 @@ def test_shell_cmd_inputs_1_st(): "inpA", attr.ib( type=str, - metadata={"position": 1, "help_string": "inp1", "argstr": ""}, + metadata={"position": 1, "help": "inp1", "argstr": ""}, ), ) ], @@ -124,14 +122,14 @@ def test_shell_cmd_inputs_2(): "inpA", attr.ib( type=str, - metadata={"position": 2, "help_string": "inpA", "argstr": ""}, + metadata={"position": 2, "help": "inpA", "argstr": ""}, ), ), ( "inpB", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpN", "argstr": ""}, + metadata={"position": 1, "help": "inpN", "argstr": ""}, ), ), ], @@ -150,8 +148,8 @@ def test_shell_cmd_inputs_2a(): my_input_spec = SpecInfo( name="Input", fields=[ - ("inpA", attr.ib(type=str, metadata={"help_string": "inpA", "argstr": ""})), - ("inpB", attr.ib(type=str, metadata={"help_string": "inpB", "argstr": ""})), + ("inpA", attr.ib(type=str, metadata={"help": "inpA", "argstr": ""})), + ("inpB", attr.ib(type=str, metadata={"help": "inpB", "argstr": ""})), ], bases=(ShellDef,), ) @@ -176,14 +174,14 @@ def test_shell_cmd_inputs_2_err(): "inpA", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": ""}, + metadata={"position": 1, "help": "inpA", "argstr": ""}, ), ), ( "inpB", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpB", "argstr": ""}, + metadata={"position": 1, "help": "inpB", "argstr": ""}, ), ), ], @@ -209,14 +207,14 @@ def test_shell_cmd_inputs_2_noerr(): "inpA", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": ""}, + metadata={"position": 1, "help": "inpA", "argstr": ""}, ), ), ( "inpB", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpB", "argstr": ""}, + metadata={"position": 1, "help": "inpB", "argstr": ""}, ), ), ], @@ -236,17 +234,17 @@ def test_shell_cmd_inputs_3(): "inpA", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": ""}, + metadata={"position": 1, "help": "inpA", "argstr": ""}, ), ), ( "inpB", attr.ib( type=str, - metadata={"position": -1, "help_string": "inpB", "argstr": ""}, + metadata={"position": -1, "help": "inpB", "argstr": ""}, ), ), - ("inpC", attr.ib(type=str, metadata={"help_string": "inpC", "argstr": ""})), + ("inpC", attr.ib(type=str, metadata={"help": "inpC", "argstr": ""})), ], bases=(ShellDef,), ) @@ -272,7 +270,7 @@ def test_shell_cmd_inputs_argstr_1(): "inpA", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpA", "argstr": "-v"}, + metadata={"position": 1, "help": "inpA", "argstr": "-v"}, ), ) ], @@ -293,7 +291,7 @@ def test_shell_cmd_inputs_argstr_2(): "inpA", attr.ib( type=bool, - metadata={"position": 1, "help_string": "inpA", "argstr": "-v"}, + metadata={"position": 1, "help": "inpA", "argstr": "-v"}, ), ) ], @@ -317,7 +315,7 @@ def test_shell_cmd_inputs_list_1(): "inpA", attr.ib( type=ty.List[str], - metadata={"position": 2, "help_string": "inpA", "argstr": ""}, + metadata={"position": 2, "help": "inpA", "argstr": ""}, ), ) ], @@ -340,7 +338,7 @@ def test_shell_cmd_inputs_list_2(): "inpA", attr.ib( type=ty.List[str], - metadata={"position": 2, "help_string": "inpA", "argstr": "-v"}, + metadata={"position": 2, "help": "inpA", "argstr": "-v"}, ), ) ], @@ -362,7 +360,7 @@ def test_shell_cmd_inputs_list_3(): "inpA", attr.ib( type=ty.List[str], - metadata={"position": 2, "help_string": "inpA", "argstr": "-v..."}, + metadata={"position": 2, "help": "inpA", "argstr": "-v..."}, ), ) ], @@ -387,7 +385,7 @@ def test_shell_cmd_inputs_list_sep_1(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "", }, @@ -417,7 +415,7 @@ def test_shell_cmd_inputs_list_sep_2(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "-v", }, @@ -447,7 +445,7 @@ def test_shell_cmd_inputs_list_sep_2a(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "-v {inpA}", }, @@ -477,7 +475,7 @@ def test_shell_cmd_inputs_list_sep_3(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "-v...", }, @@ -507,7 +505,7 @@ def test_shell_cmd_inputs_list_sep_3a(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "-v {inpA}...", }, @@ -537,7 +535,7 @@ def test_shell_cmd_inputs_sep_4(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "-v...", }, @@ -562,7 +560,7 @@ def test_shell_cmd_inputs_sep_4a(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "sep": ",", "argstr": "-v...", }, @@ -587,7 +585,7 @@ def test_shell_cmd_inputs_format_1(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "-v {inpA}", }, ), @@ -611,7 +609,7 @@ def test_shell_cmd_inputs_format_2(): type=MultiInputObj[str], metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "-v {inpA}...", }, ), @@ -639,7 +637,7 @@ def test_shell_cmd_inputs_format_3(): type=float, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "-v {inpA:.5f}", }, ), @@ -663,7 +661,7 @@ def test_shell_cmd_inputs_mandatory_1(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -689,7 +687,7 @@ def test_shell_cmd_inputs_not_given_1(): type=MultiInputObj, metadata={ "argstr": "--arg1", - "help_string": "Command line argument 1", + "help": "Command line argument 1", }, ), ), @@ -699,7 +697,7 @@ def test_shell_cmd_inputs_not_given_1(): type=MultiInputObj, metadata={ "argstr": "--arg2", - "help_string": "Command line argument 2", + "help": "Command line argument 2", }, ), ), @@ -709,7 +707,7 @@ def test_shell_cmd_inputs_not_given_1(): type=File, metadata={ "argstr": "--arg3", - "help_string": "Command line argument 3", + "help": "Command line argument 3", }, ), ), @@ -734,7 +732,7 @@ def test_shell_cmd_inputs_template_1(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -746,7 +744,7 @@ def test_shell_cmd_inputs_template_1(): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_out", }, @@ -775,7 +773,7 @@ def test_shell_cmd_inputs_template_1a(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -786,7 +784,7 @@ def test_shell_cmd_inputs_template_1a(): attr.ib( type=str, metadata={ - "help_string": "outA", + "help": "outA", "output_file_template": "{inpA}_out", }, ), @@ -810,7 +808,7 @@ def test_shell_cmd_inputs_template_2(): "inpB", attr.ib( type=str, - metadata={"position": 1, "help_string": "inpB", "argstr": ""}, + metadata={"position": 1, "help": "inpB", "argstr": ""}, ), ), ( @@ -819,7 +817,7 @@ def test_shell_cmd_inputs_template_2(): type=str, metadata={ "position": 2, - "help_string": "outB", + "help": "outB", "argstr": "-o", "output_file_template": "{inpB}_out", }, @@ -853,7 +851,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -865,7 +863,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): type=str, metadata={ "position": 2, - "help_string": "inpB", + "help": "inpB", "argstr": "", "mandatory": True, }, @@ -876,7 +874,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): attr.ib( type=str, metadata={ - "help_string": "outA", + "help": "outA", "output_file_template": "{inpA}_out", }, ), @@ -886,7 +884,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): attr.ib( type=str, metadata={ - "help_string": "outB", + "help": "outB", "output_file_template": "{inpB}_out", }, ), @@ -897,7 +895,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): type=str, metadata={ "position": -1, - "help_string": "outAB", + "help": "outAB", "argstr": "-o {outA} {outB}", "readonly": True, }, @@ -933,7 +931,7 @@ def test_shell_cmd_inputs_template_3a(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -945,7 +943,7 @@ def test_shell_cmd_inputs_template_3a(): type=str, metadata={ "position": 2, - "help_string": "inpB", + "help": "inpB", "argstr": "", "mandatory": True, }, @@ -957,7 +955,7 @@ def test_shell_cmd_inputs_template_3a(): type=str, metadata={ "position": -1, - "help_string": "outAB", + "help": "outAB", "argstr": "-o {outA} {outB}", "readonly": True, }, @@ -968,7 +966,7 @@ def test_shell_cmd_inputs_template_3a(): attr.ib( type=str, metadata={ - "help_string": "outA", + "help": "outA", "output_file_template": "{inpA}_out", }, ), @@ -978,7 +976,7 @@ def test_shell_cmd_inputs_template_3a(): attr.ib( type=str, metadata={ - "help_string": "outB", + "help": "outB", "output_file_template": "{inpB}_out", }, ), @@ -1014,7 +1012,7 @@ def test_shell_cmd_inputs_template_4(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1024,7 +1022,7 @@ def test_shell_cmd_inputs_template_4(): "inpB", attr.ib( type=str, - metadata={"position": 2, "help_string": "inpB", "argstr": ""}, + metadata={"position": 2, "help": "inpB", "argstr": ""}, ), ), ( @@ -1033,7 +1031,7 @@ def test_shell_cmd_inputs_template_4(): type=str, metadata={ "position": -1, - "help_string": "outAB", + "help": "outAB", "argstr": "-o {outA} {outB}", "readonly": True, }, @@ -1044,7 +1042,7 @@ def test_shell_cmd_inputs_template_4(): attr.ib( type=str, metadata={ - "help_string": "outA", + "help": "outA", "output_file_template": "{inpA}_out", }, ), @@ -1054,7 +1052,7 @@ def test_shell_cmd_inputs_template_4(): attr.ib( type=str, metadata={ - "help_string": "outB", + "help": "outB", "output_file_template": "{inpB}_out", }, ), @@ -1080,7 +1078,7 @@ def test_shell_cmd_inputs_template_5_ex(): type=str, metadata={ "position": -1, - "help_string": "outAB", + "help": "outAB", "argstr": "-o", "readonly": True, }, @@ -1111,7 +1109,7 @@ def test_shell_cmd_inputs_template_6(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1123,7 +1121,7 @@ def test_shell_cmd_inputs_template_6(): type=ty.Union[str, bool], metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_out", }, @@ -1171,7 +1169,7 @@ def test_shell_cmd_inputs_template_6a(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1184,7 +1182,7 @@ def test_shell_cmd_inputs_template_6a(): default=False, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_out", }, @@ -1230,7 +1228,7 @@ def test_shell_cmd_inputs_template_7(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1242,7 +1240,7 @@ def test_shell_cmd_inputs_template_7(tmp_path: Path): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "", "output_file_template": "{inpA}_out", }, @@ -1278,7 +1276,7 @@ def test_shell_cmd_inputs_template_7a(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1290,7 +1288,7 @@ def test_shell_cmd_inputs_template_7a(tmp_path: Path): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "", "keep_extension": True, "output_file_template": "{inpA}_out", @@ -1327,7 +1325,7 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1339,7 +1337,7 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "", "keep_extension": False, "output_file_template": "{inpA}_out", @@ -1374,7 +1372,7 @@ def test_shell_cmd_inputs_template_8(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1386,7 +1384,7 @@ def test_shell_cmd_inputs_template_8(tmp_path: Path): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "", "output_file_template": "{inpA}_out.txt", }, @@ -1422,7 +1420,7 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1434,7 +1432,7 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): type=int, metadata={ "position": 2, - "help_string": "inp int", + "help": "inp int", "argstr": "-i", "mandatory": True, }, @@ -1446,7 +1444,7 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): type=str, metadata={ "position": 3, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_{inpInt}_out.txt", }, @@ -1484,7 +1482,7 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1496,7 +1494,7 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): type=str, metadata={ "position": 2, - "help_string": "inp str", + "help": "inp str", "argstr": "-i", "mandatory": True, }, @@ -1508,7 +1506,7 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): type=str, metadata={ "position": 3, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_{inpStr}_out.txt", }, @@ -1546,7 +1544,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1558,7 +1556,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): type=File, metadata={ "position": 2, - "help_string": "inp file", + "help": "inp file", "argstr": "-i", "mandatory": True, }, @@ -1570,7 +1568,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): type=str, metadata={ "position": 3, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_{inpFile}_out.txt", }, @@ -1610,7 +1608,7 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): type=File, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1622,7 +1620,7 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): type=str, metadata={ "position": 2, - "help_string": "inp str with extension", + "help": "inp str with extension", "argstr": "-i", "mandatory": True, }, @@ -1634,7 +1632,7 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): type=str, metadata={ "position": 3, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_{inpStr}_out.txt", }, @@ -1670,7 +1668,7 @@ def test_shell_cmd_inputs_template_10(): type=float, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "{inpA:.1f}", "mandatory": True, }, @@ -1682,7 +1680,7 @@ def test_shell_cmd_inputs_template_10(): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "file_{inpA:.1f}_out", }, @@ -1712,7 +1710,7 @@ def test_shell_cmd_inputs_template_requires_1(): attr.ib( type=str, metadata={ - "help_string": "input file", + "help": "input file", "mandatory": True, "argstr": "", }, @@ -1722,7 +1720,7 @@ def test_shell_cmd_inputs_template_requires_1(): "with_tpl", attr.ib( type=bool, - metadata={"help_string": "enable template"}, + metadata={"help": "enable template"}, ), ), ( @@ -1730,7 +1728,7 @@ def test_shell_cmd_inputs_template_requires_1(): attr.ib( type=str, metadata={ - "help_string": "output file", + "help": "output file", "argstr": "--tpl", "output_file_template": "tpl.{in_file}", "requires": {"with_tpl"}, @@ -1768,7 +1766,7 @@ def template_fun(inputs): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1780,7 +1778,7 @@ def template_fun(inputs): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": template_fun, }, @@ -1816,7 +1814,7 @@ def template_fun(inputs): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1827,7 +1825,7 @@ def template_fun(inputs): attr.ib( type=int, metadata={ - "help_string": "inpB", + "help": "inpB", "mandatory": True, }, ), @@ -1838,7 +1836,7 @@ def template_fun(inputs): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": template_fun, }, @@ -1871,7 +1869,7 @@ def test_shell_cmd_inputs_template_1_st(): type=str, metadata={ "position": 1, - "help_string": "inpA", + "help": "inpA", "argstr": "", "mandatory": True, }, @@ -1883,7 +1881,7 @@ def test_shell_cmd_inputs_template_1_st(): type=str, metadata={ "position": 2, - "help_string": "outA", + "help": "outA", "argstr": "-o", "output_file_template": "{inpA}_out", }, @@ -1920,7 +1918,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=int, metadata={ - "help_string": """ + "help": """ 2/3/4 This option forces the image to be treated as a specified-dimensional image. If not specified, the program tries to infer the dimensionality from @@ -1936,7 +1934,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=File, metadata={ - "help_string": "A scalar image is expected as input for noise correction.", + "help": "A scalar image is expected as input for noise correction.", "argstr": "-i", "mandatory": True, }, @@ -1947,7 +1945,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=str, metadata={ - "help_string": """ + "help": """ Rician/(Gaussian) Employ a Rician or Gaussian noise model. """, @@ -1961,7 +1959,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=str, metadata={ - "help_string": "If a mask image is specified, denoising is only performed in the mask region.", + "help": "If a mask image is specified, denoising is only performed in the mask region.", "argstr": "-x", }, ), @@ -1972,7 +1970,7 @@ def test_shell_cmd_inputs_denoise_image( type=int, default=1, metadata={ - "help_string": """ + "help": """ (1)/2/3/... Running noise correction on large images can be time consuming. To lessen computation time, the input image can be resampled. @@ -1989,7 +1987,7 @@ def test_shell_cmd_inputs_denoise_image( type=int, default=1, metadata={ - "help_string": "Patch radius. Default = 1x1x1", + "help": "Patch radius. Default = 1x1x1", "argstr": "-p", }, ), @@ -2000,7 +1998,7 @@ def test_shell_cmd_inputs_denoise_image( type=int, default=2, metadata={ - "help_string": "Search radius. Default = 2x2x2.", + "help": "Search radius. Default = 2x2x2.", "argstr": "-r", }, ), @@ -2010,7 +2008,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=str, metadata={ - "help_string": """ + "help": """ The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, @@ -2024,7 +2022,7 @@ def test_shell_cmd_inputs_denoise_image( type=ty.Union[str, bool], default=False, metadata={ - "help_string": """ + "help": """ The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, @@ -2037,7 +2035,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=str, metadata={ - "help_string": "Combined output", + "help": "Combined output", "argstr": "-o [{correctedImage}, {noiseImage}]", "position": -1, "readonly": True, @@ -2050,7 +2048,7 @@ def test_shell_cmd_inputs_denoise_image( type=bool, default=False, metadata={ - "help_string": "Get Version Information.", + "help": "Get Version Information.", "argstr": "--version", }, ), @@ -2060,7 +2058,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=int, default=0, - metadata={"help_string": "(0)/1. Verbose output. ", "argstr": "-v"}, + metadata={"help": "(0)/1. Verbose output. ", "argstr": "-v"}, ), ), ( @@ -2069,7 +2067,7 @@ def test_shell_cmd_inputs_denoise_image( type=bool, default=False, metadata={ - "help_string": "Print the help menu (short version)", + "help": "Print the help menu (short version)", "argstr": "-h", }, ), @@ -2079,7 +2077,7 @@ def test_shell_cmd_inputs_denoise_image( attr.ib( type=int, metadata={ - "help_string": "Print the help menu.", + "help": "Print the help menu.", "argstr": "--help", }, ), @@ -2170,16 +2168,16 @@ def test_shell_cmd_inputs_denoise_image( class SimpleTaskXor(ShellDef["SimpleTaskXor.Outputs"]): input_1: str = shell.arg( - help_string="help", + help="help", xor=("input_1", "input_2", "input_3"), ) input_2: bool = shell.arg( - help_string="help", + help="help", argstr="--i2", xor=("input_1", "input_2", "input_3"), ) input_3: bool = shell.arg( - help_string="help", + help="help", xor=("input_1", "input_2", "input_3"), ) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 1af55843eb..a1f72d7b43 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -214,7 +214,7 @@ def test_singularity_inputspec_1(plugin, tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ) @@ -256,7 +256,7 @@ def test_singularity_inputspec_1a(plugin, tmp_path): attr.ib( type=File, default=filename, - metadata={"position": 1, "argstr": "", "help_string": "input file"}, + metadata={"position": 1, "argstr": "", "help": "input file"}, ), ) ], @@ -300,7 +300,7 @@ def test_singularity_inputspec_2(plugin, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "input file 1", + "help": "input file 1", }, ), ), @@ -312,7 +312,7 @@ def test_singularity_inputspec_2(plugin, tmp_path): metadata={ "position": 2, "argstr": "", - "help_string": "input file 2", + "help": "input file 2", }, ), ), @@ -361,7 +361,7 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "input file 1", + "help": "input file 1", }, ), ), @@ -372,7 +372,7 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): metadata={ "position": 2, "argstr": "", - "help_string": "input file 2", + "help": "input file 2", }, ), ), @@ -421,7 +421,7 @@ def test_singularity_inputspec_2a(plugin, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "input file 1", + "help": "input file 1", }, ), ), @@ -432,7 +432,7 @@ def test_singularity_inputspec_2a(plugin, tmp_path): metadata={ "position": 2, "argstr": "", - "help_string": "input file 2", + "help": "input file 2", }, ), ), @@ -477,7 +477,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "orig file", + "help": "orig file", "mandatory": True, "copyfile": True, }, @@ -489,7 +489,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): type=str, metadata={ "output_file_template": "{orig_file}", - "help_string": "output file", + "help": "output file", }, ), ), @@ -545,7 +545,7 @@ def test_singularity_inputspec_state_1(tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ) @@ -595,7 +595,7 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ) @@ -638,7 +638,7 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ) @@ -694,7 +694,7 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ) @@ -751,7 +751,7 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): "mandatory": True, "position": 1, "argstr": "", - "help_string": "input file", + "help": "input file", }, ), ) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 104992f387..896398d060 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -591,7 +591,7 @@ def testfunc(a): my_input_spec = SpecInfo( name="Input", - fields=[("a", attr.ib(type=float, metadata={"help_string": "input a"}))], + fields=[("a", attr.ib(type=float, metadata={"help": "input a"}))], bases=(FunctionDef,), ) @@ -610,7 +610,7 @@ def testfunc(a): my_input_spec = SpecInfo( name="Input", - fields=[("a", attr.ib(type=int, metadata={"help_string": "input a"}))], + fields=[("a", attr.ib(type=int, metadata={"help": "input a"}))], bases=(FunctionDef,), ) with pytest.raises(TypeError): @@ -631,7 +631,7 @@ def testfunc(a): fields=[ ( "a", - attr.ib(type=float, metadata={"position": 1, "help_string": "input a"}), + attr.ib(type=float, metadata={"position": 1, "help": "input a"}), ) ], bases=(FunctionDef,), @@ -666,7 +666,7 @@ def testfunc(a: int): my_input_spec = SpecInfo( name="Input", - fields=[("a", attr.ib(type=float, metadata={"help_string": "input a"}))], + fields=[("a", attr.ib(type=float, metadata={"help": "input a"}))], bases=(FunctionDef,), ) @@ -686,7 +686,7 @@ def testfunc(a: int): my_input_spec = SpecInfo( name="Input", - fields=[("a", float, {"help_string": "input a"})], + fields=[("a", float, {"help": "input a"})], bases=(FunctionDef,), ) @@ -710,7 +710,7 @@ def testfunc(a): "a", attr.ib( type=int, - metadata={"help_string": "input a", "allowed_values": [0, 1, 2]}, + metadata={"help": "input a", "allowed_values": [0, 1, 2]}, ), ) ], @@ -737,7 +737,7 @@ def testfunc(a): "a", attr.ib( type=int, - metadata={"help_string": "input a", "allowed_values": [0, 1, 2]}, + metadata={"help": "input a", "allowed_values": [0, 1, 2]}, ), ) ], @@ -762,15 +762,11 @@ def testfunc(a, b=1): fields=[ ( "a", - attr.ib( - type=int, metadata={"help_string": "input a", "mandatory": True} - ), + attr.ib(type=int, metadata={"help": "input a", "mandatory": True}), ), ( "b", - attr.ib( - type=int, metadata={"help_string": "input b", "mandatory": True} - ), + attr.ib(type=int, metadata={"help": "input b", "mandatory": True}), ), ], bases=(FunctionDef,), @@ -795,11 +791,9 @@ def testfunc(a, b=1): fields=[ ( "a", - attr.ib( - type=int, metadata={"help_string": "input a", "mandatory": True} - ), + attr.ib(type=int, metadata={"help": "input a", "mandatory": True}), ), - ("b", attr.ib(type=int, default=10, metadata={"help_string": "input b"})), + ("b", attr.ib(type=int, default=10, metadata={"help": "input b"})), ], bases=(FunctionDef,), ) @@ -820,9 +814,7 @@ def testfunc(a): my_input_spec = SpecInfo( name="Input", - fields=[ - ("a", attr.ib(type=MultiInputObj, metadata={"help_string": "input a"})) - ], + fields=[("a", attr.ib(type=MultiInputObj, metadata={"help": "input a"}))], bases=(FunctionDef,), ) @@ -841,7 +833,7 @@ def testfunc(a): my_output_spec = SpecInfo( name="Output", - fields=[("out1", attr.ib(type=float, metadata={"help_string": "output"}))], + fields=[("out1", attr.ib(type=float, metadata={"help": "output"}))], bases=(BaseDef,), ) @@ -861,7 +853,7 @@ def testfunc(a): my_output_spec = SpecInfo( name="Output", - fields=[("out1", attr.ib(type=int, metadata={"help_string": "output"}))], + fields=[("out1", attr.ib(type=int, metadata={"help": "output"}))], bases=(BaseDef,), ) @@ -881,7 +873,7 @@ def testfunc(a) -> int: my_output_spec = SpecInfo( name="Output", - fields=[("out1", attr.ib(type=float, metadata={"help_string": "output"}))], + fields=[("out1", attr.ib(type=float, metadata={"help": "output"}))], bases=(BaseDef,), ) @@ -902,7 +894,7 @@ def testfunc(a) -> int: my_output_spec = SpecInfo( name="Output", - fields=[("out1", float, {"help_string": "output"})], + fields=[("out1", float, {"help": "output"})], bases=(BaseDef,), ) @@ -925,7 +917,7 @@ def testfunc(a, b): fields=[ ( "out_list", - attr.ib(type=MultiOutputObj, metadata={"help_string": "output"}), + attr.ib(type=MultiOutputObj, metadata={"help": "output"}), ) ], bases=(BaseDef,), @@ -950,7 +942,7 @@ def testfunc(a): fields=[ ( "out_1el", - attr.ib(type=MultiOutputObj, metadata={"help_string": "output"}), + attr.ib(type=MultiOutputObj, metadata={"help": "output"}), ) ], bases=(BaseDef,), @@ -1117,7 +1109,7 @@ def test_audit_shellcommandtask_file(tmp_path): metadata={ "position": 1, "argstr": "", - "help_string": "text", + "help": "text", "mandatory": True, }, ), @@ -1129,7 +1121,7 @@ def test_audit_shellcommandtask_file(tmp_path): metadata={ "position": 2, "argstr": "", - "help_string": "text", + "help": "text", "mandatory": True, }, ), diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 8bf9932923..411435234d 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -35,14 +35,14 @@ class GenericShellTask(specs.ShellDef["GenericShellTask.Outputs"]): """class with customized input and executables""" in_file: File = shell.arg( - help_string="the input file", + help="the input file", argstr="", copy_mode="copy", ) class Outputs(specs.ShellOutputs): out: File = shell.outarg( - help_string="output file name", + help="output file name", argstr="", position=-1, path_template="{in_file}", @@ -61,7 +61,7 @@ class SpecificShellTask(specs.ShellDef["SpecificShellTask.Outputs"]): executable = "echo" in_file: MyFormatX = shell.arg( - help_string="the input file", + help="the input file", argstr="", copy_mode="copy", sep=" ", @@ -69,7 +69,7 @@ class SpecificShellTask(specs.ShellDef["SpecificShellTask.Outputs"]): class Outputs(specs.ShellOutputs): out: MyFormatX = shell.outarg( - help_string="output file name", + help="output file name", argstr="", position=-1, path_template="{in_file}", # Pass through un-altered @@ -84,7 +84,7 @@ def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: class OtherSpecificShellTask(ShellTask): in_file: MyOtherFormatX = shell.arg( - help_string="the input file", + help="the input file", argstr="", copy_mode="copy", sep=" ", @@ -92,7 +92,7 @@ class OtherSpecificShellTask(ShellTask): class Outputs(specs.ShellOutputs): out: MyOtherFormatX = shell.outarg( - help_string="output file name", + help="output file name", argstr="", position=-1, path_template="{in_file}", # Pass through un-altered From ac7ab7f60dccfde4401d301c5268462bbdc7487c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 5 Jan 2025 00:15:48 +1100 Subject: [PATCH 129/342] renamed make_task_spec to make_task_def --- .../source/tutorial/advanced-execution.ipynb | 4 ++ new-docs/source/tutorial/canonical-form.ipynb | 2 +- .../source/tutorial/getting-started.ipynb | 40 +++++++++++++++---- new-docs/source/tutorial/workflow.ipynb | 2 +- pydra/design/base.py | 6 ++- pydra/design/boutiques.py | 4 +- pydra/design/python.py | 4 +- pydra/design/shell.py | 4 +- pydra/design/workflow.py | 4 +- pydra/engine/submitter.py | 7 ++++ pydra/engine/tests/test_dockertask.py | 10 ----- pydra/engine/tests/test_environments.py | 1 - pydra/utils/typing.py | 2 + 13 files changed, 59 insertions(+), 31 deletions(-) diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index 9b69208865..e633d0970c 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -64,6 +64,10 @@ "from fileformats.medimage import Nifti\n", "from pydra.engine.submitter import Submitter\n", "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "import nest_asyncio\n", + "\n", + "# Allow running async code in Jupyter notebooks\n", + "nest_asyncio.apply()\n", "\n", "# Make directory filled with nifti files\n", "test_dir = Path(tempfile.mkdtemp())\n", diff --git a/new-docs/source/tutorial/canonical-form.ipynb b/new-docs/source/tutorial/canonical-form.ipynb index 242e893510..4ff8b93ec8 100644 --- a/new-docs/source/tutorial/canonical-form.ipynb +++ b/new-docs/source/tutorial/canonical-form.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Canonical (dataclass) task form\n", + "# Canonical task form\n", "\n", "Under the hood, all Python, shell and workflow task definitions generated by the\n", "`pydra.design.*.define` decorators/functions are translated to\n", diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index 0044c4783d..d97211bd85 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -38,6 +38,10 @@ "from pathlib import Path\n", "from tempfile import mkdtemp\n", "import json\n", + "import nest_asyncio\n", + "\n", + "# Allow running async code in Jupyter notebooks\n", + "nest_asyncio.apply()\n", "\n", "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", "\n", @@ -57,9 +61,17 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev128+g1e817743.d20250104\n" + ] + } + ], "source": [ "# Import the task definition\n", "from pydra.tasks.common import LoadJson\n", @@ -83,13 +95,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from pydra.engine.submitter import Submitter\n", "\n", - "with Submitter(plugin='cf', n_procs=1) as submitter:\n", + "with Submitter(plugin='serial', n_procs=1) as submitter:\n", " result = submitter(load_json)" ] }, @@ -119,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -142,14 +154,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'MrGrid' object has no attribute 'split'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvoxel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m(\u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Run the task to resample all NIfTI files\u001b[39;00m\n\u001b[1;32m 7\u001b[0m outputs \u001b[38;5;241m=\u001b[39m mrgrid()\n", + "\u001b[0;31mAttributeError\u001b[0m: 'MrGrid' object has no attribute 'split'" + ] + } + ], "source": [ "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", - "mrgrid = MrGrid(voxel=0.5).split(input=nifti_dir.iterdir())\n", + "mrgrid = MrGrid(voxel=(0.5,0.5,0.5)).split(input=nifti_dir.iterdir())\n", "\n", "# Run the task to resample all NIfTI files\n", "outputs = mrgrid()\n", diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index 32c1192053..68dea430be 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -414,7 +414,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "wf12", "language": "python", "name": "python3" }, diff --git a/pydra/design/base.py b/pydra/design/base.py index 5524cfe93b..9bdfb1f832 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -34,7 +34,7 @@ "Arg", "Out", "ensure_field_objects", - "make_task_spec", + "make_task_def", ] @@ -349,7 +349,7 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: return inputs, outputs -def make_task_spec( +def make_task_def( spec_type: type["TaskDef"], out_type: type["TaskOutputs"], task_type: type["Task"], @@ -724,6 +724,8 @@ def allowed_values_validator(_, attribute, value): allowed = attribute.metadata[PYDRA_ATTR_METADATA].allowed_values if value is attrs.NOTHING or is_lazy(value): pass + elif value is None and is_optional(attribute.type): + pass elif value not in allowed: raise ValueError( f"value of {attribute.name} has to be from {allowed}, but {value} provided" diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 20bcc3efd0..8931877e44 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -7,7 +7,7 @@ from fileformats.generic import File from pydra.engine.specs import ShellDef from pydra.engine.task import BoshTask -from .base import make_task_spec +from .base import make_task_def from . import shell @@ -113,7 +113,7 @@ def define( outputs = _prepare_output_spec( bosh_spec, input_keys, names_subset=output_spec_names ) - return make_task_spec( + return make_task_def( spec_type=ShellDef, task_type=BoshTask, out_type=out, diff --git a/pydra/design/python.py b/pydra/design/python.py index 128c583fe8..8464c1c02b 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -6,7 +6,7 @@ Arg, Out, ensure_field_objects, - make_task_spec, + make_task_def, parse_doc_string, extract_function_inputs_and_outputs, check_explicit_fields_are_none, @@ -159,7 +159,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: name="function", type=ty.Callable, default=function ) - interface = make_task_spec( + interface = make_task_def( PythonDef, PythonOutputs, PythonTask, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 9c70882e43..3baad9c712 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -19,7 +19,7 @@ check_explicit_fields_are_none, extract_fields_from_class, ensure_field_objects, - make_task_spec, + make_task_def, EMPTY, ) from pydra.utils.typing import is_fileset_or_union, MultiInputObj @@ -373,7 +373,7 @@ def make( if inpt.position is None: inpt.position = position_stack.pop(0) - interface = make_task_spec( + interface = make_task_def( ShellDef, ShellOutputs, ShellTask, diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index fe700c5cb0..53f76eba4a 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -6,7 +6,7 @@ Arg, Out, ensure_field_objects, - make_task_spec, + make_task_def, parse_doc_string, extract_function_inputs_and_outputs, check_explicit_fields_are_none, @@ -171,7 +171,7 @@ def make(wrapped: ty.Callable | type) -> TaskDef: for inpt_name in lazy: parsed_inputs[inpt_name].lazy = True - interface = make_task_spec( + interface = make_task_def( WorkflowDef, WorkflowOutputs, WorkflowTask, diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 0b7f2c9841..6f5a7d0290 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -50,8 +50,15 @@ def __init__(self, plugin: ty.Union[str, ty.Type[Worker]] = "cf", **kwargs): def __call__(self, runnable, cache_locations=None, rerun=False, environment=None): """Submitter run function.""" + from pydra.engine.core import TaskDef + if cache_locations is not None: runnable.cache_locations = cache_locations + if isinstance(runnable, TaskDef): + runnable = runnable.Task( + runnable, + cache_locations=cache_locations, + ) self.loop.run_until_complete( self.submit_from_call(runnable, rerun, environment) ) diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index d6893fac5d..729307881d 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -140,7 +140,6 @@ def test_docker_inputspec_1(tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -258,7 +257,6 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): shell.arg( name="file2", type=File, - mandatory=True, position=2, argstr="", help="input file 2", @@ -304,7 +302,6 @@ def test_docker_inputspec_2a(plugin, tmp_path): shell.arg( name="file2", type=File, - mandatory=True, position=2, argstr="", help="input file 2", @@ -335,7 +332,6 @@ def test_docker_inputspec_3(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -372,7 +368,6 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): shell.arg( name="orig_file", type=File, - mandatory=True, position=1, argstr="", help="orig file", @@ -423,7 +418,6 @@ def test_docker_inputspec_state_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -460,7 +454,6 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -490,7 +483,6 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -533,7 +525,6 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -578,7 +569,6 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index d306381c6a..5d2aea0d01 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -179,7 +179,6 @@ def create_shelly_inputfile(tempdir, filename, name, executable): type=File, position=1, help="files", - mandatory=True, argstr="", ) ] diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index e48bdda753..01d9de784f 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1042,6 +1042,8 @@ def is_fileset_or_union(type_: type) -> bool: """Check if the type is a FileSet or a Union containing a FileSet""" if is_union(type_): return any(is_fileset_or_union(t) for t in ty.get_args(type_)) + elif not inspect.isclass(type_): + return False return issubclass(type_, core.FileSet) From 7c47eeff44aad30fb2b3cc2a69716d162d57ed57 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 7 Jan 2025 10:36:37 +1100 Subject: [PATCH 130/342] touch up docs --- .../source/explanation/conditional-lazy.rst | 4 ++-- .../source/explanation/hashing-caching.rst | 4 ++++ new-docs/source/explanation/typing.rst | 4 ++-- new-docs/source/index.rst | 19 +++++++++++-------- 4 files changed, 19 insertions(+), 12 deletions(-) create mode 100644 new-docs/source/explanation/hashing-caching.rst diff --git a/new-docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst index 0c30be1d1c..fd81077b42 100644 --- a/new-docs/source/explanation/conditional-lazy.rst +++ b/new-docs/source/explanation/conditional-lazy.rst @@ -1,4 +1,4 @@ -Conditionals and lazy fields -============================ +Dynamic workflow construction +============================= Work in progress... diff --git a/new-docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst new file mode 100644 index 0000000000..4f7e561c5d --- /dev/null +++ b/new-docs/source/explanation/hashing-caching.rst @@ -0,0 +1,4 @@ +Hashing and caching +=================== + +Work in progress... diff --git a/new-docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst index a49d304b09..e682e1f850 100644 --- a/new-docs/source/explanation/typing.rst +++ b/new-docs/source/explanation/typing.rst @@ -1,4 +1,4 @@ -Typing, file-formats and hashing -================================ +Typing and file-formats +======================= Work in progress... diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index a69b8a13fc..edb9e609c0 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -10,16 +10,18 @@ Pydra helps build reproducible, scalable, reusable, and fully automated, provena tracked scientific workflows that combine Python functions and shell commands. The power of Pydra lies in ease of workflow creation and execution for complex -multiparameter map-reduce operations, and the use of global cache. +multiparameter map-reduce operations, and the use of global cache (see +:ref:`Design philosophy` for the rationale behind its design). Pydra's key features are: -- Modular execution backends (see `Advanced execution <./tutorial/advanced-execution.html>`__) -- Map-reduce like semantics (see `Splitting and combining <./explanation/splitting-combining.html>`__) -- Global cache support to reduce recomputation (see `Hashing and caching <./explanation/hashing-caching.html>`__) -- Support for execution of Tasks in containerized environments (see `Environments <./explanation/environments.html>`__) -- Strong type-checking and type-hinting support (see `Typing <./explanation/typing.html>`__) -See :ref:`Design philosophy` for more an explanation of the design of Pydra. +* Map-reduce like semantics (see :ref:`Splitting and combining`) +* Dynamic workflow construction using Python code (see :ref:`Dynamic workflow construction`) +* Modular execution backends (see `Advanced execution <./tutorial/advanced-execution.html>`__) +* Global cache support to reduce recomputation (see :ref:`Hashing and caching`) +* Support for the execution of tasks in containerized environments (see :ref:`Containers and environments`) +* Strong type-checking and type-hinting support (see :ref:`Typing and file-formats`) + Installation ------------ @@ -113,11 +115,12 @@ Indices and tables .. toctree:: :maxdepth: 2 - :caption: In-depth + :caption: Explanation :hidden: explanation/design-approach explanation/splitting-combining + explanation/hashing-caching explanation/typing explanation/conditional-lazy explanation/environments From 83cbbb622ffddce6155fb7f2582c565a8bdd0b9d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 7 Jan 2025 14:24:30 +1100 Subject: [PATCH 131/342] touching up docs --- .gitignore | 1 + environment.yml | 25 ++++++ .../source/explanation/conditional-lazy.rst | 4 +- new-docs/source/explanation/typing.rst | 31 +++++++ new-docs/source/index.rst | 80 +++++++++++++------ .../source/tutorial/advanced-execution.ipynb | 11 ++- .../source/tutorial/getting-started.ipynb | 78 +++++++++++++++--- new-docs/source/tutorial/python.ipynb | 2 +- new-docs/source/tutorial/workflow.ipynb | 10 ++- notebooks/examples | 1 + notebooks/tutorial | 1 + pydra/design/__init__.py | 6 -- pyproject.toml | 21 ++++- 13 files changed, 219 insertions(+), 52 deletions(-) create mode 100644 environment.yml create mode 120000 notebooks/examples create mode 120000 notebooks/tutorial delete mode 100644 pydra/design/__init__.py diff --git a/.gitignore b/.gitignore index fe1935c4bb..293fc4f8a8 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ cov.xml *.venv .DS_Store +.ipynb_checkpoints # This can be generated in-tree. We never want to commit it. pydra/_version.py diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000..f2712b1ae6 --- /dev/null +++ b/environment.yml @@ -0,0 +1,25 @@ +name: pydra-tutorial +channels: + - conda-forge + - defaults +dependencies: + - datalad + - pip + - pip: + - pydra==0.23.0a0 + - jupyter + - jupyter_contrib_nbextensions + - jupytext + - jupyterlab + - matplotlib + - nbformat + - nbval + - nest_asyncio + - psutil + - sh + - pytest + - numpy + - pandas + - scipy + - nibabel + - nilearn diff --git a/new-docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst index fd81077b42..22c1b148bd 100644 --- a/new-docs/source/explanation/conditional-lazy.rst +++ b/new-docs/source/explanation/conditional-lazy.rst @@ -1,4 +1,4 @@ -Dynamic workflow construction -============================= +Dynamic construction +==================== Work in progress... diff --git a/new-docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst index e682e1f850..685346f373 100644 --- a/new-docs/source/explanation/typing.rst +++ b/new-docs/source/explanation/typing.rst @@ -2,3 +2,34 @@ Typing and file-formats ======================= Work in progress... + +Pydra implements strong(-ish) type-checking at workflow construction time, which can +include file types. + +Coercion +-------- + + +File formats +------------ + +The FileFormats_ package provides a way to specify the format of a file, or set of +files, by the extensible collection of file format classes. These classes can be +used to specify the format of a file in a task input or output, and can be used +to validate the format of a file at runtime. + +It is important to use a FileFormats_ type, when specifying fields that represent +a path to an existing file-system object (In most cases, it is sufficient to use the generic ``fileformats.generic.File``, +``fileformats.generic.File``, class + +Superclass auto-casting +----------------------- + +Not wanting the typing to get in the way by being unnecessarily strict, +upstream fields that are typed as super classes (or as ``typing.Any`` by default) +of the task input they are connected to will be automatically cast to the subclass +when the task is run. This allows workflows and tasks to be easily connected together +regardless of how specific typing is defined in the task definition. + + +.. _FileFormats: https://arcanaframework.github.io/fileformats diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index edb9e609c0..20f2883f34 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -4,23 +4,24 @@ Pydra ===== Pydra is a lightweight, Python 3.11+ dataflow engine for computational graph construction, -manipulation, and distributed execution. Designed as a successor to created for Nipype_, +manipulation, and distributed execution. Designed as a successor to Nipype_, Pydra is a general-purpose engine that supports analytics in any scientific domain. -Pydra helps build reproducible, scalable, reusable, and fully automated, provenance -tracked scientific workflows that combine Python functions and shell commands. +Pydra helps build reproducible, scalable, reusable scientific workflows +that combine Python functions and shell commands. -The power of Pydra lies in ease of workflow creation and execution for complex -multiparameter map-reduce operations, and the use of global cache (see +The power of Pydra lies in ease of constructing workflows, containing complex +multiparameter map-reduce operations, in Python code and the use of a global cache (see :ref:`Design philosophy` for the rationale behind its design). Pydra's key features are: +* Combine diverse tasks, implemented in `Python functions <./tutorial/python.html>`__ or stand-alone `shell commands <./tutorial/shell.html>`__, into coherent `workflows <./tutorial/workflow.html>`__ * Map-reduce like semantics (see :ref:`Splitting and combining`) -* Dynamic workflow construction using Python code (see :ref:`Dynamic workflow construction`) -* Modular execution backends (see `Advanced execution <./tutorial/advanced-execution.html>`__) -* Global cache support to reduce recomputation (see :ref:`Hashing and caching`) +* Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) +* Modular execution systems for varied deployment on cloud, HPC, etc... (see `Execution options <./tutorial/advanced-execution.html>`__) * Support for the execution of tasks in containerized environments (see :ref:`Containers and environments`) -* Strong type-checking and type-hinting support (see :ref:`Typing and file-formats`) +* Global caching to reduce recomputation (see :ref:`Hashing and caching`) +* Support for strong type-checking, including file types, at workflow construction time (see :ref:`Typing and file-formats`) Installation @@ -45,21 +46,44 @@ install these packages as well: Of course, if you use Pydra to execute commands within toolkits, you will need to either have those commands installed on the execution machine, or use containers -environments (see [Environments](../explanation/environments.html)) to run them. +environments (see `Environments <../explanation/environments.html>`__) to run them. -Tutorials ---------- +Tutorials and notebooks +----------------------- + +The following tutorials provide a step-by-step guide to using Pydra. +They can be read in any order, but it is recommended to start with :ref:`Getting started`. +The tutorials are implemented as Jupyter notebooks, which can be downloaded and run locally +or run online using the |Binder| within each tutorial. + +If you decide to download the notebooks and run locally, be sure to install the necessary +dependencies with ``pip install -e /path/to/your/pydra[tutorial]``. + + +Execution +~~~~~~~~~ + +Learn how to execute existing tasks (including workflows) on different systems * :ref:`Getting started` -* :ref:`Advanced execution` +* :ref:`Execution options` + +Design +~~~~~~ + +Learn how to design your own tasks + * :ref:`Python-tasks` * :ref:`Shell-tasks` * :ref:`Workflows` -* :ref:`Canonical (dataclass) task form` +* :ref:`Canonical task form` Examples --------- +~~~~~~~~ + +The following comprehensive examples demonstrate how to use Pydra to build and execute +complex workflows * :ref:`T1w MRI preprocessing` * :ref:`One-level GLM` @@ -68,18 +92,24 @@ Examples How-to Guides ------------- +The following guides provide step-by-step instructions on how to + * :ref:`Create a task package` * :ref:`Port interfaces from Nipype` -Indices and tables ------------------- +Reference +--------- + +See the full reference documentation for Pydra +* :ref:`API` * :ref:`genindex` * :ref:`modindex` + .. toctree:: :maxdepth: 2 - :caption: Task execution + :caption: Tutorials: Execution :hidden: tutorial/getting-started @@ -87,7 +117,7 @@ Indices and tables .. toctree:: :maxdepth: 2 - :caption: Design + :caption: Tutorials: Design :hidden: tutorial/python @@ -118,13 +148,12 @@ Indices and tables :caption: Explanation :hidden: - explanation/design-approach explanation/splitting-combining - explanation/hashing-caching - explanation/typing explanation/conditional-lazy explanation/environments - + explanation/hashing-caching + explanation/typing + explanation/design-approach .. toctree:: :maxdepth: 2 @@ -132,10 +161,13 @@ Indices and tables :hidden: reference/api + genindex + modindex .. _FSL: https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/FSL .. _ANTs: http://stnava.github.io/ANTs/ .. _AFNI: https://afni.nimh.nih.gov/ .. _niworkflows: https://niworkflows.readthedocs.io/en/latest/ .. _Nipype: https://nipype.readthedocs.io/en/latest/ -.. _ +.. |Binder| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/nipype/pydra/develop diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/advanced-execution.ipynb index e633d0970c..f31e66c6d1 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/advanced-execution.ipynb @@ -4,7 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Advanced execution" + "# Execution options\n", + "\n", + "One of the key design features of Pydra is the separation between the parameterisation of\n", + "the task to be executed, and the parameresiation of where and how the task should be\n", + "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", + "some of the available options for executing a task.\n", + "\n", + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" ] }, { @@ -27,7 +34,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Caching\n", + "## Cache locations\n", "\n", "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", "task and the operation to be performed. This hash is used to name the output directory for\n", diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/getting-started.ipynb index d97211bd85..803509f30b 100644 --- a/new-docs/source/tutorial/getting-started.ipynb +++ b/new-docs/source/tutorial/getting-started.ipynb @@ -97,12 +97,35 @@ "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "not enough values to unpack (expected 3, got 2)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter() \u001b[38;5;28;01mas\u001b[39;00m submitter:\n\u001b[0;32m----> 4\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msubmitter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mload_json\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(result)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:62\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, runnable, cache_locations, rerun, environment)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(runnable, TaskDef):\n\u001b[1;32m 58\u001b[0m runnable \u001b[38;5;241m=\u001b[39m runnable\u001b[38;5;241m.\u001b[39mTask(\n\u001b[1;32m 59\u001b[0m runnable,\n\u001b[1;32m 60\u001b[0m cache_locations\u001b[38;5;241m=\u001b[39mcache_locations,\n\u001b[1;32m 61\u001b[0m )\n\u001b[0;32m---> 62\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmit_from_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrunnable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menvironment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 64\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m PersistentCache()\u001b[38;5;241m.\u001b[39mclean_up()\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m runnable\u001b[38;5;241m.\u001b[39mresult()\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:316\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 314\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39msend(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 316\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mthrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_must_cancel:\n\u001b[1;32m 319\u001b[0m \u001b[38;5;66;03m# Task is cancelled right before coro stops.\u001b[39;00m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:98\u001b[0m, in \u001b[0;36mSubmitter.submit_from_call\u001b[0;34m(self, runnable, rerun, environment)\u001b[0m\n\u001b[1;32m 95\u001b[0m runnable\u001b[38;5;241m.\u001b[39m_reset()\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 97\u001b[0m \u001b[38;5;66;03m# 2\u001b[39;00m\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexpand_runnable(runnable, wait\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, rerun\u001b[38;5;241m=\u001b[39mrerun) \u001b[38;5;66;03m# TODO\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:143\u001b[0m, in \u001b[0;36mSubmitter.expand_runnable\u001b[0;34m(self, runnable, wait, rerun)\u001b[0m\n\u001b[1;32m 138\u001b[0m futures\u001b[38;5;241m.\u001b[39madd(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun_el((task_pkl, runnable), rerun\u001b[38;5;241m=\u001b[39mrerun))\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m wait \u001b[38;5;129;01mand\u001b[39;00m futures:\n\u001b[1;32m 141\u001b[0m \u001b[38;5;66;03m# if wait is True, we are at the end of the graph / state expansion.\u001b[39;00m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Once the remaining jobs end, we will exit `submit_from_call`\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mfutures)\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# pass along futures to be awaited independently\u001b[39;00m\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:385\u001b[0m, in \u001b[0;36mTask.__wakeup\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__wakeup\u001b[39m(\u001b[38;5;28mself\u001b[39m, future):\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 385\u001b[0m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 387\u001b[0m \u001b[38;5;66;03m# This may also be a cancellation.\u001b[39;00m\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__step(exc)\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 316\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:185\u001b[0m, in \u001b[0;36mConcurrentFuturesWorker.exec_as_coro\u001b[0;34m(self, runnable, rerun, environment)\u001b[0m\n\u001b[1;32m 181\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloop\u001b[38;5;241m.\u001b[39mrun_in_executor(\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool, runnable\u001b[38;5;241m.\u001b[39m_run, rerun, environment\n\u001b[1;32m 183\u001b[0m )\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# it could be tuple that includes pickle files with tasks and inputs\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m ind, task_main_pkl, task_orig \u001b[38;5;241m=\u001b[39m runnable\n\u001b[1;32m 186\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloop\u001b[38;5;241m.\u001b[39mrun_in_executor(\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool, load_and_run, task_main_pkl, ind, rerun, environment\n\u001b[1;32m 188\u001b[0m )\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", + "\u001b[0;31mValueError\u001b[0m: not enough values to unpack (expected 3, got 2)" + ] + } + ], "source": [ "from pydra.engine.submitter import Submitter\n", "\n", - "with Submitter(plugin='serial', n_procs=1) as submitter:\n", - " result = submitter(load_json)" + "with Submitter() as submitter:\n", + " result = submitter(load_json)\n", + "\n", + "print(result)" ] }, { @@ -131,9 +154,22 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "FileExistsError", + "evalue": "[Errno 17] File exists: '/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnaqc3ee3/nifti'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileExistsError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 3\u001b[0m nifti_dir \u001b[38;5;241m=\u001b[39m test_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnifti\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m \u001b[43mnifti_dir\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmkdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m10\u001b[39m):\n\u001b[1;32m 7\u001b[0m Nifti\u001b[38;5;241m.\u001b[39msample(nifti_dir, seed\u001b[38;5;241m=\u001b[39mi) \u001b[38;5;66;03m# Create a dummy NIfTI file in the dest. directory\u001b[39;00m\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/pathlib.py:1311\u001b[0m, in \u001b[0;36mPath.mkdir\u001b[0;34m(self, mode, parents, exist_ok)\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1308\u001b[0m \u001b[38;5;124;03mCreate a new directory at this given path.\u001b[39;00m\n\u001b[1;32m 1309\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1311\u001b[0m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmkdir\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1312\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 1313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m parents \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparent \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mself\u001b[39m:\n", + "\u001b[0;31mFileExistsError\u001b[0m: [Errno 17] File exists: '/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnaqc3ee3/nifti'" + ] + } + ], "source": [ "from fileformats.medimage import Nifti\n", "\n", @@ -141,7 +177,7 @@ "nifti_dir.mkdir()\n", "\n", "for i in range(10):\n", - " Nifti.sample(nifti_dir, seed=i)" + " Nifti.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory" ] }, { @@ -149,7 +185,7 @@ "metadata": {}, "source": [ "Then we can by importing the `MrGrid` shell-command task from the `pydra-mrtrix3` package\n", - "and then splitting over the list of files in the directory" + "and run it over every NIfTI file in the directory using the `TaskDef.split()` method" ] }, { @@ -173,6 +209,7 @@ "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", "mrgrid = MrGrid(voxel=(0.5,0.5,0.5)).split(input=nifti_dir.iterdir())\n", "\n", "# Run the task to resample all NIfTI files\n", @@ -186,11 +223,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "It is also possible to iterate over inputs in pairs, if for example you wanted to use\n", + "It is also possible to iterate over inputs in pairs/n-tuples. For example, if you wanted to use\n", "different voxel sizes for different images, both the list of images and the voxel sizes\n", "are passed to the `split()` method and their combination is specified by a tuple \"splitter\"\n", + "\n", + "\n", + "Note that it is important to use a tuple not a list for the splitter definition in this\n", + "case, because a list splitter is interpreted as the split over each combination of inputs\n", "(see [Splitting and combining](../explanation/splitting-combining.html) for more details\n", - "on splitters)" + "on splitters)." ] }, { @@ -199,14 +240,25 @@ "metadata": {}, "outputs": [], "source": [ - "# Define a list of voxel sizes to resample the NIfTI files to, must be the same length\n", - "# as the number of NIfTI files\n", - "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", + "\n", "\n", "mrgrid_varying_vox_sizes = MrGrid().split(\n", " (\"input\", \"voxel\"),\n", " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", + " # Define a list of voxel sizes to resample the NIfTI files to,\n", + " # the list must be the same length as the list of NIfTI files\n", + " voxel=[\n", + " (1.0, 1.0, 1.0),\n", + " (1.0, 1.0, 1.0),\n", + " (1.0, 1.0, 1.0),\n", + " (0.5, 0.5, 0.5),\n", + " (0.75, 0.75, 0.75),\n", + " (0.5, 0.5, 0.5),\n", + " (0.5, 0.5, 0.5),\n", + " (1.0, 1.0, 1.0),\n", + " (1.25, 1.25, 1.25),\n", + " (1.25, 1.25, 1.25),\n", + " ],\n", ")\n", "\n", "print(mrgrid_varying_vox_sizes().output)" diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/python.ipynb index 140500343d..c8cf726689 100644 --- a/new-docs/source/tutorial/python.ipynb +++ b/new-docs/source/tutorial/python.ipynb @@ -72,7 +72,7 @@ "metadata": {}, "source": [ "By default, the name of the output field for a function with only one output is `out`. To\n", - "name this something else, in the case where there are multiple output fields, the `outputs`\n", + "name this something else, or in the case where there are multiple output fields, the `outputs`\n", "argument can be provided to `python.define`\n" ] }, diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/workflow.ipynb index 68dea430be..bbff37bf8d 100644 --- a/new-docs/source/tutorial/workflow.ipynb +++ b/new-docs/source/tutorial/workflow.ipynb @@ -65,9 +65,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "`workflow.add` returns an \"outputs\" object corresponding to the definition added to the workflow. The fields of the outptus object can be referenced as inputs to downstream workflow nodes. Note that these fields are just placeholders for the values that will be returned and can't be used in conditional statements during workflow construction. The return value(s) of workflow constructor function are the placeholders of the fields that are to be the outputs of the workflow.\n", - "\n", - "It is also possible to define new tasks to add to the workflow inline the constructor and type the inputs and outputs of the workflow." + "`workflow.add` returns an \"outputs\" object corresponding to the definition added to the\n", + "workflow. The fields of the outptus object can be referenced as inputs to downstream\n", + "workflow nodes. Note that these output fields are just placeholders for the values that will\n", + "be returned and can't be used in conditional statements during workflow construction\n", + "(see [Dynamic construction](../explanation/conditional-lazy.html) on how to work around this\n", + "limitation). The fields of the outputs to be returned by the workflow should be returned\n", + "in a tuple." ] }, { diff --git a/notebooks/examples b/notebooks/examples new file mode 120000 index 0000000000..9c255e151f --- /dev/null +++ b/notebooks/examples @@ -0,0 +1 @@ +../new-docs/source/examples \ No newline at end of file diff --git a/notebooks/tutorial b/notebooks/tutorial new file mode 120000 index 0000000000..7d3c73de2d --- /dev/null +++ b/notebooks/tutorial @@ -0,0 +1 @@ +../new-docs/source/tutorial \ No newline at end of file diff --git a/pydra/design/__init__.py b/pydra/design/__init__.py deleted file mode 100644 index 0cfe94caa9..0000000000 --- a/pydra/design/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from . import python -from . import shell -from . import workflow - - -__all__ = ["python", "shell", "workflow"] diff --git a/pyproject.toml b/pyproject.toml index 69a79f9b6e..549b88a5af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,7 +76,26 @@ test = [ "boutiques", "pympler", ] -jupyter = ["nest_asyncio"] +tutorial = [ + "fileformats-medimage-extras", + "jupyter", + "jupyter_contrib_nbextensions", + "jupytext", + "jupyterlab", + "matplotlib", + "nest_asyncio", + "nbformat", + "nbval", + "nibabel", + "nilearn", + "numpy", + "pandas", + "pydra-mrtrix3", + "psutil", + "pytest", + "scipy", + "sh", +] # Aliases tests = ["pydra[test]"] docs = ["pydra[doc]"] From e6c488f78c8da9ca78cce160edbc8533bbcfe01d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 7 Jan 2025 14:40:30 +1100 Subject: [PATCH 132/342] renamed tutorials to give natural order --- new-docs/source/index.rst | 16 ++++++++-------- ...ing-started.ipynb => 1-getting-started.ipynb} | 0 ...xecution.ipynb => 2-advanced-execution.ipynb} | 2 +- .../tutorial/{python.ipynb => 3-python.ipynb} | 0 .../tutorial/{shell.ipynb => 4-shell.ipynb} | 0 .../{workflow.ipynb => 5-workflow.ipynb} | 0 ...nonical-form.ipynb => 6-canonical-form.ipynb} | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) rename new-docs/source/tutorial/{getting-started.ipynb => 1-getting-started.ipynb} (100%) rename new-docs/source/tutorial/{advanced-execution.ipynb => 2-advanced-execution.ipynb} (99%) rename new-docs/source/tutorial/{python.ipynb => 3-python.ipynb} (100%) rename new-docs/source/tutorial/{shell.ipynb => 4-shell.ipynb} (100%) rename new-docs/source/tutorial/{workflow.ipynb => 5-workflow.ipynb} (100%) rename new-docs/source/tutorial/{canonical-form.ipynb => 6-canonical-form.ipynb} (98%) diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 20f2883f34..0831f73f4b 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -15,10 +15,10 @@ multiparameter map-reduce operations, in Python code and the use of a global cac Pydra's key features are: -* Combine diverse tasks, implemented in `Python functions <./tutorial/python.html>`__ or stand-alone `shell commands <./tutorial/shell.html>`__, into coherent `workflows <./tutorial/workflow.html>`__ +* Combine diverse tasks, implemented in `Python functions <./tutorial/3-python.html>`__ or stand-alone `shell commands <./tutorial/4-shell.html>`__, into coherent `workflows <./tutorial/5-workflow.html>`__ * Map-reduce like semantics (see :ref:`Splitting and combining`) * Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) -* Modular execution systems for varied deployment on cloud, HPC, etc... (see `Execution options <./tutorial/advanced-execution.html>`__) +* Modular execution systems for varied deployment on cloud, HPC, etc... (see `Execution options <./tutorial/2-advanced-execution.html>`__) * Support for the execution of tasks in containerized environments (see :ref:`Containers and environments`) * Global caching to reduce recomputation (see :ref:`Hashing and caching`) * Support for strong type-checking, including file types, at workflow construction time (see :ref:`Typing and file-formats`) @@ -112,18 +112,18 @@ See the full reference documentation for Pydra :caption: Tutorials: Execution :hidden: - tutorial/getting-started - tutorial/advanced-execution + tutorial/1-getting-started + tutorial/2-advanced-execution .. toctree:: :maxdepth: 2 :caption: Tutorials: Design :hidden: - tutorial/python - tutorial/shell - tutorial/workflow - tutorial/canonical-form + tutorial/3-python + tutorial/4-shell + tutorial/5-workflow + tutorial/6-canonical-form .. toctree:: diff --git a/new-docs/source/tutorial/getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb similarity index 100% rename from new-docs/source/tutorial/getting-started.ipynb rename to new-docs/source/tutorial/1-getting-started.ipynb diff --git a/new-docs/source/tutorial/advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb similarity index 99% rename from new-docs/source/tutorial/advanced-execution.ipynb rename to new-docs/source/tutorial/2-advanced-execution.ipynb index f31e66c6d1..ddb473ef32 100644 --- a/new-docs/source/tutorial/advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -43,7 +43,7 @@ "inputs then the location of its output directory will also be the same, and the outputs\n", "generated by the previous run are reused.\n", "\n", - "For example, using the MrGrid example from the [Getting Started Tutorial](./getting-started.html)\n" + "For example, using the MrGrid example from the [Getting Started Tutorial](./1-getting-started.html)\n" ] }, { diff --git a/new-docs/source/tutorial/python.ipynb b/new-docs/source/tutorial/3-python.ipynb similarity index 100% rename from new-docs/source/tutorial/python.ipynb rename to new-docs/source/tutorial/3-python.ipynb diff --git a/new-docs/source/tutorial/shell.ipynb b/new-docs/source/tutorial/4-shell.ipynb similarity index 100% rename from new-docs/source/tutorial/shell.ipynb rename to new-docs/source/tutorial/4-shell.ipynb diff --git a/new-docs/source/tutorial/workflow.ipynb b/new-docs/source/tutorial/5-workflow.ipynb similarity index 100% rename from new-docs/source/tutorial/workflow.ipynb rename to new-docs/source/tutorial/5-workflow.ipynb diff --git a/new-docs/source/tutorial/canonical-form.ipynb b/new-docs/source/tutorial/6-canonical-form.ipynb similarity index 98% rename from new-docs/source/tutorial/canonical-form.ipynb rename to new-docs/source/tutorial/6-canonical-form.ipynb index 4ff8b93ec8..4472f4d9c0 100644 --- a/new-docs/source/tutorial/canonical-form.ipynb +++ b/new-docs/source/tutorial/6-canonical-form.ipynb @@ -10,7 +10,7 @@ "`pydra.design.*.define` decorators/functions are translated to\n", "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", - "in the [Python-tasks](./python.html), [Shell-tasks](./shell.html) and [Workflow](./workflow.html)\n", + "in the [Python-tasks](./3-python.html), [Shell-tasks](./4-shell.html) and [Workflow](./5-workflow.html)\n", "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", "for linters follow. Therefore, when designing task definitions to be used by third\n", "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", From 88a05bb826852b1046cafa3415f2c6a6cc8b152a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 7 Jan 2025 19:32:24 +1100 Subject: [PATCH 133/342] filled out explanation sections with subheadings --- .../source/explanation/conditional-lazy.rst | 35 +- new-docs/source/explanation/environments.rst | 33 +- .../source/explanation/hashing-caching.rst | 61 ++- new-docs/source/explanation/typing.rst | 71 ++- .../source/howto/create-task-package.ipynb | 4 +- new-docs/source/howto/port-from-nipype.ipynb | 4 +- new-docs/source/index.rst | 10 +- .../tutorial/2-advanced-execution.ipynb | 510 +++++++++--------- 8 files changed, 447 insertions(+), 281 deletions(-) diff --git a/new-docs/source/explanation/conditional-lazy.rst b/new-docs/source/explanation/conditional-lazy.rst index 22c1b148bd..85178a6653 100644 --- a/new-docs/source/explanation/conditional-lazy.rst +++ b/new-docs/source/explanation/conditional-lazy.rst @@ -1,4 +1,37 @@ Dynamic construction ==================== -Work in progress... +Pydra workflows are constructed dynamically by workflow "constructor" functions. These +functions can use any valid Python code, allowing rich and complex workflows to be +constructed based on the inputs to the workflow. For example, a workflow constructor +could include conditional branches, loops, or other control flow structures, to tailor +the workflow to the specific inputs provided. + + +Lazy fields +----------- + +Pydra workflows are constructed by the assignment of "lazy field" placeholders from +the outputs of upstream nodes to the inputs of downstream nodes. These placeholders, +which are instances of the :class:`pydra.engine.specs.LazyField` class, are replaced +by the actual values they represent when the workflow is run. + + +Caching of workflow construction +-------------------------------- + +Workflows are constructed just before they are executed to produce a Directed Acyclic Graph +(DAG) of nodes. Tasks are generated from these nodes as upstream inputs become available +and added to the execution stack. If the workflow has been split, either at the top-level, +in an upstream node or at the current node, then a separate task will be generated for +split. + + +Nested workflows and lazy conditionals +-------------------------------------- + +Since lazy fields are only evaluated at runtime, they can't be used in conditional +statements that construct the workflow. However, if there is a section of a workflow +that needs to be conditionally included or excluded based on upstream outputs, that +section can be implemented in a nested workflow and that upstream be connected to the +nested workflow. diff --git a/new-docs/source/explanation/environments.rst b/new-docs/source/explanation/environments.rst index 778b2bc35a..f5f1009c00 100644 --- a/new-docs/source/explanation/environments.rst +++ b/new-docs/source/explanation/environments.rst @@ -1,4 +1,33 @@ -Containers and environments -=========================== +Software environments +===================== + +Pydra supports running tasks within encapsulated software environments, such as Docker_ +and Singularity_ containers. This can be specified at runtime or during workflow +construction, and allows tasks to be run in environments that are isolated from the +host system, and that have specific software dependencies. + +The environment a task runs within is specified by the ``environment`` argument passed +to the execution call (e.g. ``my_task(plugin="cf", environment="docker")``) or in the +``workflow.add()`` call in workflow constructors. + +Specifying at execution +----------------------- + +Work in progress... + + +Specifying at workflow construction +----------------------------------- Work in progress... + + + +Implementing new environment types +---------------------------------- + +Work in progress... + + +.. _Docker: https://www.docker.com/ +.. _Singularity: https://sylabs.io/singularity/ diff --git a/new-docs/source/explanation/hashing-caching.rst b/new-docs/source/explanation/hashing-caching.rst index 4f7e561c5d..3edbd434f5 100644 --- a/new-docs/source/explanation/hashing-caching.rst +++ b/new-docs/source/explanation/hashing-caching.rst @@ -1,4 +1,61 @@ -Hashing and caching -=================== +Caches and hashes +================= + +In Pydra, each task is run within its own working directory. If a task completes +successfully, their outputs are stored within this working directory. Working directories +are created within a cache directory, which is specified when the task is executed, and +named according to the hash of the task's inputs. This means that if the same task is +executed with the same inputs, the same working directory will be used, and instead of the task +being rerun, the outputs from the previous run will be reused. + +In this manner, incomplete workflows can be resumed from where they left off, and completed +workflows can be rerun without having to rerun all of the tasks. This is particularly useful +when working with datasets that are to be analysed in several different ways with +common intermediate steps, or when debugging workflows that have failed part way through. + + +Hash calculations +----------------- + +Hashes are calculated for different types of objects in different ways. For example, the +hash of a string is simply the hash of the string itself, whereas the hash of a dictionary +is the hash of all the file names and contents within the directory. Implementations for +most common types are provided in the :mod:`pydra.utils.hash` module, but custom types +can be hashed by providing a custom ``bytes_repr`` function (see +:ref:`Registering custom bytes_repr functions`). + +A cache dictionary, is passed each ``bytes_repr`` call that maps an objects id (i.e. +as returned by the built-in ``id()`` function) to the hash, to avoid infinite recursions +in the case of circular references. + +The byte representation of each object is hashed using the BlakeB cryptographic algorithm, +and these hashes are then combined to create a hash of the entire inputs object. + + +File hash caching by mtime +-------------------------- + +To avoid having to recalculate the hash of large files between runs, file hashes themselves +are cached in a platform specific user directory. These hashes are stored within small +files named by yet another hash of the file-system path an mtime of the file. This means that +the contents of a file should only need to be hashed once unless it is modified. + +.. note:: + + Due to limitations in mtime resolution on different platforms (e.g. 1 second on Linux, + potentially 2 seconds on Windows), it is conceivable that a file could be modified, + hashed, and then modified again within resolution period, causing the hash to be + invalid. Therefore, cached hashes are only used once the mtime resolution period + has lapsed since it was last modified, and may be recalculated in some rare cases. + + +Registering custom bytes_repr functions +--------------------------------------- + +Work in progress... + + +Cache misses due to unstable hashes +----------------------------------- Work in progress... diff --git a/new-docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst index 685346f373..12a477a4e0 100644 --- a/new-docs/source/explanation/typing.rst +++ b/new-docs/source/explanation/typing.rst @@ -1,14 +1,17 @@ Typing and file-formats ======================= -Work in progress... +Pydra implements strong(-ish) type-checking at workflow construction time so some errors +can be caught before workflows are run on potentially expensive computing resources. +Input and output fields of tasks can be typed using Python annotations. +Unlike how they are typically used, in Pydra these type annotations are not just for +documentation and linting purposes, but are used to enforce the types of the inputs +and outputs of tasks and workflows at workflow construction and runtime. -Pydra implements strong(-ish) type-checking at workflow construction time, which can -include file types. - -Coercion --------- +.. note:: + With the exception of fields containing file-system paths, which should be typed + a FileFormats_ class, types don't need to be specified if not desired. File formats ------------ @@ -18,18 +21,58 @@ files, by the extensible collection of file format classes. These classes can be used to specify the format of a file in a task input or output, and can be used to validate the format of a file at runtime. -It is important to use a FileFormats_ type, when specifying fields that represent -a path to an existing file-system object (In most cases, it is sufficient to use the generic ``fileformats.generic.File``, -``fileformats.generic.File``, class +It is important to use a FileFormats_ type instead of a ``str`` or ``pathlib.Path``, +when defining a field that take paths to file-system objects, because otherwise only +the file path, not the file contents, will be used in the hash used to locate the cache +(see :ref:`Caches and hashes`). However, in most cases, it is sufficient to use the +generic ``fileformats.generic.File``, ``fileformats.generic.Directory``, or the even +more generic ``fileformats.generic.FsObject`` or ``fileformats.generic.FileSet`` classes. + +The only cases where it isn't sufficient to use generic classes, is when there are +implicit header or side cars assumed to be present adjacent to the primary file (e.g. +a NIfTI file with an associated JSON sidecar file). Because in these cases, the +header/sidecar file(s) will not be included in the hash calculation and may not be included in the +movement of the "file set" between working directories. In these cases, you need to use the +specific file format classes, such as ``fileformats.nifti.NiftiGzX``, which will check +to see if the header/sidecar files are present. + +Coercion +-------- + +Pydra will attempt to coerce the input to the correct type if it is not already, for example +if a tuple is provided to a field that is typed as a list, Pydra will convert the tuple to a list +before the task is run. By default the following coercions will be automatically +applied between the following types: + +* ty.Sequence → ty.Sequence +* ty.Mapping → ty.Mapping +* Path → os.PathLike +* str → os.PathLike +* os.PathLike → Path +* os.PathLike → str +* ty.Any → MultiInputObj +* int → float +* field.Integer → float +* int → field.Decimal + +In addition to this, ``fileformats.fields.Singular`` (see FileFormats_) +can be coerced to and from their primitive types and Numpy ndarrays and primitive types +can be coerced to and from Python sequences and built-in types, respectively. Superclass auto-casting ----------------------- -Not wanting the typing to get in the way by being unnecessarily strict, -upstream fields that are typed as super classes (or as ``typing.Any`` by default) -of the task input they are connected to will be automatically cast to the subclass -when the task is run. This allows workflows and tasks to be easily connected together -regardless of how specific typing is defined in the task definition. +Pydra is designed so that strict and specific typing can be used, but is not +unnecessarily strict, if it proves too burdensome. Therefore, upstream fields that are +typed as super classes (or as ``typing.Any`` by default) of the task input they are +connected to will be automatically cast to the subclass when the task is run. +This allows workflows and tasks to be easily connected together +regardless of how specific typing is defined in the task definition. This includes +file format types, so a task that expects a ``fileformats.medimage.NiftiGz`` file can +be connected to a task that outputs a ``fileformats.generic.File`` file. +Therefore, the only cases where a typing error will be raised are when the upstream +field can't be cast or coered to the downstream field, e.g. a ``fileformats.medimage.DicomSeries`` +cannot be cast to a ``fileformats.medimage.Nifti`` file. .. _FileFormats: https://arcanaframework.github.io/fileformats diff --git a/new-docs/source/howto/create-task-package.ipynb b/new-docs/source/howto/create-task-package.ipynb index 39aec84713..1b491cbc37 100644 --- a/new-docs/source/howto/create-task-package.ipynb +++ b/new-docs/source/howto/create-task-package.ipynb @@ -4,7 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Create a task package" + "# Create a task package\n", + "\n", + "Work in progress..." ] }, { diff --git a/new-docs/source/howto/port-from-nipype.ipynb b/new-docs/source/howto/port-from-nipype.ipynb index 3f673cd56e..ba228e387c 100644 --- a/new-docs/source/howto/port-from-nipype.ipynb +++ b/new-docs/source/howto/port-from-nipype.ipynb @@ -4,7 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Port interfaces from Nipype" + "# Port interfaces from Nipype\n", + "\n", + "Work in progress..." ] }, { diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 0831f73f4b..e8c330b0c4 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -13,14 +13,14 @@ The power of Pydra lies in ease of constructing workflows, containing complex multiparameter map-reduce operations, in Python code and the use of a global cache (see :ref:`Design philosophy` for the rationale behind its design). -Pydra's key features are: +**Key features**: -* Combine diverse tasks, implemented in `Python functions <./tutorial/3-python.html>`__ or stand-alone `shell commands <./tutorial/4-shell.html>`__, into coherent `workflows <./tutorial/5-workflow.html>`__ +* Combine diverse tasks (`Python functions <./tutorial/3-python.html>`__ or `shell commands <./tutorial/4-shell.html>`__) into coherent `workflows <./tutorial/5-workflow.html>`__ * Map-reduce like semantics (see :ref:`Splitting and combining`) * Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) -* Modular execution systems for varied deployment on cloud, HPC, etc... (see `Execution options <./tutorial/2-advanced-execution.html>`__) -* Support for the execution of tasks in containerized environments (see :ref:`Containers and environments`) -* Global caching to reduce recomputation (see :ref:`Hashing and caching`) +* Modular backends for deployment on different execution platforms (e.g. cloud, HPC, etc...) (see `Execution options <./tutorial/2-advanced-execution.html>`__) +* Support for the execution of tasks in containerized environments (see :ref:`Software environments`) +* Global caching to reduce recomputation (see :ref:`Caches and hashes`) * Support for strong type-checking, including file types, at workflow construction time (see :ref:`Typing and file-formats`) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index ddb473ef32..7482cd16da 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -1,257 +1,257 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Execution options\n", - "\n", - "One of the key design features of Pydra is the separation between the parameterisation of\n", - "the task to be executed, and the parameresiation of where and how the task should be\n", - "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", - "some of the available options for executing a task.\n", - "\n", - "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Workers\n", - "\n", - "Pydra supports several workers with which to execute tasks\n", - "\n", - "- `ConcurrentFutures`\n", - "- `SLURM`\n", - "- `Dask` (experimental)\n", - "- `Serial` (for debugging)\n", - "\n", - "Work in progress..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Cache locations\n", - "\n", - "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", - "task and the operation to be performed. This hash is used to name the output directory for\n", - "the task within the specified cache directory. Therefore, if you use the same cache\n", - "directory between runs and in a subsequent run the same task is executed with the same\n", - "inputs then the location of its output directory will also be the same, and the outputs\n", - "generated by the previous run are reused.\n", - "\n", - "For example, using the MrGrid example from the [Getting Started Tutorial](./1-getting-started.html)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Auto-generated, do not edit\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2gmwmi_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Gmwmi\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2vis_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Vis\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivettcheck_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FivettCheck\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/fivett2gmwmi_.py:7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m File, Directory \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage_mrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ImageIn, ImageOut, Tracks \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m specs, ShellCommandTask\n\u001b[1;32m 10\u001b[0m input_fields \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Arguments\u001b[39;00m\n\u001b[1;32m 12\u001b[0m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 104\u001b[0m ),\n\u001b[1;32m 105\u001b[0m ]\n\u001b[1;32m 107\u001b[0m Fivett2GmwmiInputSpec \u001b[38;5;241m=\u001b[39m specs\u001b[38;5;241m.\u001b[39mSpecInfo(\n\u001b[1;32m 108\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFivett2GmwmiInput\u001b[39m\u001b[38;5;124m\"\u001b[39m, fields\u001b[38;5;241m=\u001b[39minput_fields, bases\u001b[38;5;241m=\u001b[39m(specs\u001b[38;5;241m.\u001b[39mShellSpec,)\n\u001b[1;32m 109\u001b[0m )\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "import tempfile\n", - "from fileformats.medimage import Nifti\n", - "from pydra.engine.submitter import Submitter\n", - "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", - "import nest_asyncio\n", - "\n", - "# Allow running async code in Jupyter notebooks\n", - "nest_asyncio.apply()\n", - "\n", - "# Make directory filled with nifti files\n", - "test_dir = Path(tempfile.mkdtemp())\n", - "nifti_dir = test_dir / \"nifti\"\n", - "nifti_dir.mkdir()\n", - "for i in range(10):\n", - " Nifti.sample(nifti_dir, seed=i)\n", - "\n", - "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", - "\n", - "mrgrid_varying_vox_sizes = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", - "\n", - "# Run the task to resample all NIfTI files with different voxel sizes\n", - "with submitter:\n", - " result1 = submitter(mrgrid_varying_vox_sizes)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we attempt to run the same task with the same parameterisation the cache directory\n", - "will point to the same location and the results will be reused" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'MrGrid' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mrgrid_varying_vox_sizes2 \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 2\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 4\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m submitter \u001b[38;5;241m=\u001b[39m Submitter(cache_dir\u001b[38;5;241m=\u001b[39mtest_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Result from previous run is reused as the task and inputs are identical\u001b[39;00m\n", - "\u001b[0;31mNameError\u001b[0m: name 'MrGrid' is not defined" - ] - } - ], - "source": [ - "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result2 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result2.output_dir == result1.output_dir\n", - "\n", - "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", - "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result3 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "# The output directory will be different as the inputs are now different\n", - "assert result3.output_dir != result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that for file objects, the contents of the files are used to calculate the hash\n", - "not their paths. Therefore, when inputting large files there might be some additional\n", - "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", - "shouldn't need to be recalculated unless they are modified). However, this makes the\n", - "hashes invariant to file-system movement. For example, changing the name of one of the\n", - "files in the nifti directory won't invalidate the hash." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'nifti_dir' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Rename a NIfTI file within the test directory\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m first_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 3\u001b[0m first_file\u001b[38;5;241m.\u001b[39mrename(first_file\u001b[38;5;241m.\u001b[39mwith_name(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfirst.nii.gz\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 5\u001b[0m mrgrid_varying_vox_sizes3 \u001b[38;5;241m=\u001b[39m MrGrid()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 6\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 8\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 9\u001b[0m )\n", - "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" - ] - } - ], - "source": [ - "# Rename a NIfTI file within the test directory\n", - "first_file = next(nifti_dir.iterdir())\n", - "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", - "\n", - "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result4 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result4.output_dir == result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "See [Caching and hashing](../explanation/hashing-caching.html) for more details on how inputs\n", - "are hashed for caching and issues to consider." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environments (containers)\n", - "\n", - "Work in progress...\n", - "\n", - "See [Containers and Environments](../explanation/environments.rst) for more details." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Provenance and auditing\n", - "\n", - "Work in progress..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Execution options\n", + "\n", + "One of the key design features of Pydra is the separation between the parameterisation of\n", + "the task to be executed, and the parameresiation of where and how the task should be\n", + "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", + "some of the available options for executing a task.\n", + "\n", + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workers\n", + "\n", + "Pydra supports several workers with which to execute tasks\n", + "\n", + "- `ConcurrentFutures`\n", + "- `SLURM`\n", + "- `Dask` (experimental)\n", + "- `Serial` (for debugging)\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cache locations\n", + "\n", + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", + "task and the operation to be performed. This hash is used to name the output directory for\n", + "the task within the specified cache directory. Therefore, if you use the same cache\n", + "directory between runs and in a subsequent run the same task is executed with the same\n", + "inputs then the location of its output directory will also be the same, and the outputs\n", + "generated by the previous run are reused.\n", + "\n", + "For example, using the MrGrid example from the [Getting Started Tutorial](./1-getting-started.html)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Auto-generated, do not edit\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2gmwmi_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Gmwmi\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2vis_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Vis\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivettcheck_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FivettCheck\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/fivett2gmwmi_.py:7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m File, Directory \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage_mrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ImageIn, ImageOut, Tracks \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m specs, ShellCommandTask\n\u001b[1;32m 10\u001b[0m input_fields \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Arguments\u001b[39;00m\n\u001b[1;32m 12\u001b[0m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 104\u001b[0m ),\n\u001b[1;32m 105\u001b[0m ]\n\u001b[1;32m 107\u001b[0m Fivett2GmwmiInputSpec \u001b[38;5;241m=\u001b[39m specs\u001b[38;5;241m.\u001b[39mSpecInfo(\n\u001b[1;32m 108\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFivett2GmwmiInput\u001b[39m\u001b[38;5;124m\"\u001b[39m, fields\u001b[38;5;241m=\u001b[39minput_fields, bases\u001b[38;5;241m=\u001b[39m(specs\u001b[38;5;241m.\u001b[39mShellSpec,)\n\u001b[1;32m 109\u001b[0m )\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "import tempfile\n", + "from fileformats.medimage import Nifti\n", + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "import nest_asyncio\n", + "\n", + "# Allow running async code in Jupyter notebooks\n", + "nest_asyncio.apply()\n", + "\n", + "# Make directory filled with nifti files\n", + "test_dir = Path(tempfile.mkdtemp())\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "for i in range(10):\n", + " Nifti.sample(nifti_dir, seed=i)\n", + "\n", + "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", + "\n", + "mrgrid_varying_vox_sizes = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Run the task to resample all NIfTI files with different voxel sizes\n", + "with submitter:\n", + " result1 = submitter(mrgrid_varying_vox_sizes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we attempt to run the same task with the same parameterisation the cache directory\n", + "will point to the same location and the results will be reused" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'MrGrid' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mrgrid_varying_vox_sizes2 \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 2\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 4\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m submitter \u001b[38;5;241m=\u001b[39m Submitter(cache_dir\u001b[38;5;241m=\u001b[39mtest_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Result from previous run is reused as the task and inputs are identical\u001b[39;00m\n", + "\u001b[0;31mNameError\u001b[0m: name 'MrGrid' is not defined" + ] + } + ], + "source": [ + "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result2 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result2.output_dir == result1.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result3.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'nifti_dir' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Rename a NIfTI file within the test directory\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m first_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 3\u001b[0m first_file\u001b[38;5;241m.\u001b[39mrename(first_file\u001b[38;5;241m.\u001b[39mwith_name(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfirst.nii.gz\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 5\u001b[0m mrgrid_varying_vox_sizes3 \u001b[38;5;241m=\u001b[39m MrGrid()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 6\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 8\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 9\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" + ] + } + ], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "first_file = next(nifti_dir.iterdir())\n", + "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", + "\n", + "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", + " (\"input\", \"voxel\"),\n", + " input=nifti_dir.iterdir(),\n", + " voxel=VOXEL_SIZES\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox_sizes2)\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result4.output_dir == result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See [Caches and hashes](../explanation/hashing-caching.html) for more details on how inputs\n", + "are hashed for caching and issues to consider." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environments\n", + "\n", + "Work in progress...\n", + "\n", + "See [Containers and Environments](../explanation/environments.rst) for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provenance and auditing\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } From efa668ad1608b2c4db39f3e7f8e9f526d4be2a5c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 9 Jan 2025 12:33:58 +1100 Subject: [PATCH 134/342] more docs touch ups --- new-docs/source/conf.py | 2 +- new-docs/source/explanation/typing.rst | 10 +- new-docs/source/index.rst | 52 +++---- .../source/tutorial/1-getting-started.ipynb | 141 ++++++++++-------- .../tutorial/2-advanced-execution.ipynb | 2 +- new-docs/source/tutorial/tst.py | 26 ++++ 6 files changed, 135 insertions(+), 98 deletions(-) create mode 100644 new-docs/source/tutorial/tst.py diff --git a/new-docs/source/conf.py b/new-docs/source/conf.py index 3282fb2e66..abef948238 100644 --- a/new-docs/source/conf.py +++ b/new-docs/source/conf.py @@ -155,7 +155,7 @@ # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -html_title = "Pydra v{}".format(__version__) +html_title = "Pydra v{}".format(version) # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = 'Pydra v' diff --git a/new-docs/source/explanation/typing.rst b/new-docs/source/explanation/typing.rst index 12a477a4e0..82d47c7d35 100644 --- a/new-docs/source/explanation/typing.rst +++ b/new-docs/source/explanation/typing.rst @@ -30,11 +30,11 @@ more generic ``fileformats.generic.FsObject`` or ``fileformats.generic.FileSet`` The only cases where it isn't sufficient to use generic classes, is when there are implicit header or side cars assumed to be present adjacent to the primary file (e.g. -a NIfTI file with an associated JSON sidecar file). Because in these cases, the -header/sidecar file(s) will not be included in the hash calculation and may not be included in the -movement of the "file set" between working directories. In these cases, you need to use the -specific file format classes, such as ``fileformats.nifti.NiftiGzX``, which will check -to see if the header/sidecar files are present. +a NIfTI file `my_nifti.nii` with an associated JSON sidecar file `my_nifti.json`). +Because the header/sidecar file(s) will not be included in the hash calculation +by default and may be omitted if the "file set" is copied into a different work +directories. In such cases, a specific file format class, such as +``fileformats.nifti.NiftiGzX``, should be used instead. Coercion -------- diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index e8c330b0c4..2bba918457 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -3,25 +3,24 @@ Pydra ===== -Pydra is a lightweight, Python 3.11+ dataflow engine for computational graph construction, -manipulation, and distributed execution. Designed as a successor to Nipype_, -Pydra is a general-purpose engine that supports analytics in any scientific domain. -Pydra helps build reproducible, scalable, reusable scientific workflows -that combine Python functions and shell commands. +Pydra is a lightweight Python dataflow engine for scientific analysis. +Although designed as a successor to Nipype_, Pydra is supports analytics in any domain. +Pydra helps build reproducible, scalable, reusable workflows that link processing tasks +implemented in Python or shell commands to be executed on distributed compute platforms. -The power of Pydra lies in ease of constructing workflows, containing complex -multiparameter map-reduce operations, in Python code and the use of a global cache (see -:ref:`Design philosophy` for the rationale behind its design). +The power of Pydra lies in ease of constructing workflows containing complex +multiparameter map-reduce operations in Python code (see :ref:`Design philosophy` for +the rationale behind its design). -**Key features**: +**Key features:** * Combine diverse tasks (`Python functions <./tutorial/3-python.html>`__ or `shell commands <./tutorial/4-shell.html>`__) into coherent `workflows <./tutorial/5-workflow.html>`__ -* Map-reduce like semantics (see :ref:`Splitting and combining`) +* Concurrent execution on `choice of computing platform (e.g. workstation, SLURM, SGE, Dask, etc...) <./tutorial/2-advanced-execution.html#Workers>`__ * Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) -* Modular backends for deployment on different execution platforms (e.g. cloud, HPC, etc...) (see `Execution options <./tutorial/2-advanced-execution.html>`__) -* Support for the execution of tasks in containerized environments (see :ref:`Software environments`) +* Map-reduce-like semantics (see :ref:`Splitting and combining`) * Global caching to reduce recomputation (see :ref:`Caches and hashes`) -* Support for strong type-checking, including file types, at workflow construction time (see :ref:`Typing and file-formats`) +* Tasks can be executed in separate software environments, e.g. containers (see :ref:`Software environments`) +* Strong type-checking, including file types, before execution (see :ref:`Typing and file-formats`) Installation @@ -32,21 +31,20 @@ therefore, it is straightforward to install via pip for Python >= 3.11 .. code-block:: bash - $ pip install pydra + $ pip install pydra Pre-designed tasks are available under the `pydra.tasks.*` package namespace. These tasks -are implemented within separate packages that are typically specific to a given shell-command toolkit such as FSL_, AFNI_ or ANTs_, -or a collection of related tasks/workflows (e.g. `niworkflows`_). Pip can be used to -install these packages as well: - +are implemented within separate packages that are typically specific to a given +shell-command toolkit such as FSL_, AFNI_ or ANTs_, or a collection of related +tasks/workflows (e.g. `niworkflows`_). Pip can be used to install these packages as well: .. code-block:: bash - $ pip install pydra-fsl pydra-ants + $ pip install pydra-fsl pydra-ants -Of course, if you use Pydra to execute commands within toolkits, you will need to -either have those commands installed on the execution machine, or use containers -environments (see `Environments <../explanation/environments.html>`__) to run them. +Of course, if you use Pydra to execute commands within non-Python toolkits, you will +need to either have those commands installed on the execution machine, or use containers +to run them (see :ref:`Software environments`). Tutorials and notebooks @@ -55,10 +53,14 @@ Tutorials and notebooks The following tutorials provide a step-by-step guide to using Pydra. They can be read in any order, but it is recommended to start with :ref:`Getting started`. The tutorials are implemented as Jupyter notebooks, which can be downloaded and run locally -or run online using the |Binder| within each tutorial. +or run online using the |Binder| button within each tutorial. If you decide to download the notebooks and run locally, be sure to install the necessary -dependencies with ``pip install -e /path/to/your/pydra[tutorial]``. +dependencies with + +.. code-block:: bash + + $ pip install -e /path/to/your/pydra[tutorial] Execution @@ -67,7 +69,7 @@ Execution Learn how to execute existing tasks (including workflows) on different systems * :ref:`Getting started` -* :ref:`Execution options` +* :ref:`Advanced execution` Design ~~~~~~ diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index 803509f30b..c6b7d881ab 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -16,17 +16,11 @@ "Tasks can encapsulate Python functions, shell-commands or workflows constructed from\n", "task components.\n", "\n", - "## Running your first task\n", - "\n", - "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", - "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). To use a pre-defined task definition\n", + "## Preparation\n", "\n", - "* import the class from the `pydra.tasks.*` package it is in\n", - "* instantiate it with appropriate parameters\n", - "* \"call\" resulting object (i.e. `my_task(...)`) to execute it as you would a function \n", + "Before we get started, lets set up some test data to play with.\n", "\n", - "To demonstrate with an example of loading a JSON file with the\n", - "`pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" + "Here we create a sample JSON file in a temporary directory" ] }, { @@ -38,10 +32,6 @@ "from pathlib import Path\n", "from tempfile import mkdtemp\n", "import json\n", - "import nest_asyncio\n", - "\n", - "# Allow running async code in Jupyter notebooks\n", - "nest_asyncio.apply()\n", "\n", "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", "\n", @@ -51,6 +41,64 @@ " json.dump(JSON_CONTENTS, f)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we create a directory containing ten randomly generated [NIfTI](https://nifti.nimh.nih.gov/) files" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.medimage import Nifti\n", + "\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "\n", + "for i in range(10):\n", + " Nifti.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that when you run concurrent processes within a Jupyter notebook the following snippet\n", + "is also required" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Running your first task\n", + "\n", + "Pre-defined task definitions are installed under the `pydra.tasks.*` namespace by separate\n", + "task packages (e.g. `pydra-fsl`, `pydra-ants`, ...). To use a pre-defined task definition\n", + "\n", + "* import the class from the `pydra.tasks.*` package it is in\n", + "* instantiate it with appropriate parameters\n", + "* \"call\" resulting object (i.e. `my_task(...)`) to execute it as you would a function \n", + "\n", + "To demonstrate with an example of loading a JSON file with the\n", + "`pydra.tasks.common.LoadJson` task, we first create an example JSON file to test with" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -61,7 +109,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -95,27 +143,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "not enough values to unpack (expected 3, got 2)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter() \u001b[38;5;28;01mas\u001b[39;00m submitter:\n\u001b[0;32m----> 4\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msubmitter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mload_json\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(result)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:62\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, runnable, cache_locations, rerun, environment)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(runnable, TaskDef):\n\u001b[1;32m 58\u001b[0m runnable \u001b[38;5;241m=\u001b[39m runnable\u001b[38;5;241m.\u001b[39mTask(\n\u001b[1;32m 59\u001b[0m runnable,\n\u001b[1;32m 60\u001b[0m cache_locations\u001b[38;5;241m=\u001b[39mcache_locations,\n\u001b[1;32m 61\u001b[0m )\n\u001b[0;32m---> 62\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmit_from_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrunnable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menvironment\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 64\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 65\u001b[0m PersistentCache()\u001b[38;5;241m.\u001b[39mclean_up()\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m runnable\u001b[38;5;241m.\u001b[39mresult()\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:316\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 314\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39msend(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 316\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mthrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_must_cancel:\n\u001b[1;32m 319\u001b[0m \u001b[38;5;66;03m# Task is cancelled right before coro stops.\u001b[39;00m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:98\u001b[0m, in \u001b[0;36mSubmitter.submit_from_call\u001b[0;34m(self, runnable, rerun, environment)\u001b[0m\n\u001b[1;32m 95\u001b[0m runnable\u001b[38;5;241m.\u001b[39m_reset()\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 97\u001b[0m \u001b[38;5;66;03m# 2\u001b[39;00m\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexpand_runnable(runnable, wait\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, rerun\u001b[38;5;241m=\u001b[39mrerun) \u001b[38;5;66;03m# TODO\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:143\u001b[0m, in \u001b[0;36mSubmitter.expand_runnable\u001b[0;34m(self, runnable, wait, rerun)\u001b[0m\n\u001b[1;32m 138\u001b[0m futures\u001b[38;5;241m.\u001b[39madd(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun_el((task_pkl, runnable), rerun\u001b[38;5;241m=\u001b[39mrerun))\n\u001b[1;32m 140\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m wait \u001b[38;5;129;01mand\u001b[39;00m futures:\n\u001b[1;32m 141\u001b[0m \u001b[38;5;66;03m# if wait is True, we are at the end of the graph / state expansion.\u001b[39;00m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;66;03m# Once the remaining jobs end, we will exit `submit_from_call`\u001b[39;00m\n\u001b[0;32m--> 143\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mgather(\u001b[38;5;241m*\u001b[39mfutures)\n\u001b[1;32m 144\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;66;03m# pass along futures to be awaited independently\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:385\u001b[0m, in \u001b[0;36mTask.__wakeup\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__wakeup\u001b[39m(\u001b[38;5;28mself\u001b[39m, future):\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 385\u001b[0m \u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m 387\u001b[0m \u001b[38;5;66;03m# This may also be a cancellation.\u001b[39;00m\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__step(exc)\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 316\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:185\u001b[0m, in \u001b[0;36mConcurrentFuturesWorker.exec_as_coro\u001b[0;34m(self, runnable, rerun, environment)\u001b[0m\n\u001b[1;32m 181\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloop\u001b[38;5;241m.\u001b[39mrun_in_executor(\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool, runnable\u001b[38;5;241m.\u001b[39m_run, rerun, environment\n\u001b[1;32m 183\u001b[0m )\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# it could be tuple that includes pickle files with tasks and inputs\u001b[39;00m\n\u001b[0;32m--> 185\u001b[0m ind, task_main_pkl, task_orig \u001b[38;5;241m=\u001b[39m runnable\n\u001b[1;32m 186\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloop\u001b[38;5;241m.\u001b[39mrun_in_executor(\n\u001b[1;32m 187\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool, load_and_run, task_main_pkl, ind, rerun, environment\n\u001b[1;32m 188\u001b[0m )\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", - "\u001b[0;31mValueError\u001b[0m: not enough values to unpack (expected 3, got 2)" + "name": "stdout", + "output_type": "stream", + "text": [ + "Result(output=LoadJsonOutputs(out={'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}), runtime=None, errored=False)\n" ] } ], @@ -152,34 +187,6 @@ "such as the sample ones generated by the code below" ] }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "ename": "FileExistsError", - "evalue": "[Errno 17] File exists: '/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnaqc3ee3/nifti'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileExistsError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 3\u001b[0m nifti_dir \u001b[38;5;241m=\u001b[39m test_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnifti\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 4\u001b[0m \u001b[43mnifti_dir\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmkdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m10\u001b[39m):\n\u001b[1;32m 7\u001b[0m Nifti\u001b[38;5;241m.\u001b[39msample(nifti_dir, seed\u001b[38;5;241m=\u001b[39mi) \u001b[38;5;66;03m# Create a dummy NIfTI file in the dest. directory\u001b[39;00m\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/pathlib.py:1311\u001b[0m, in \u001b[0;36mPath.mkdir\u001b[0;34m(self, mode, parents, exist_ok)\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1308\u001b[0m \u001b[38;5;124;03mCreate a new directory at this given path.\u001b[39;00m\n\u001b[1;32m 1309\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1311\u001b[0m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmkdir\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1312\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 1313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m parents \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparent \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mself\u001b[39m:\n", - "\u001b[0;31mFileExistsError\u001b[0m: [Errno 17] File exists: '/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpnaqc3ee3/nifti'" - ] - } - ], - "source": [ - "from fileformats.medimage import Nifti\n", - "\n", - "nifti_dir = test_dir / \"nifti\"\n", - "nifti_dir.mkdir()\n", - "\n", - "for i in range(10):\n", - " Nifti.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -190,18 +197,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "'MrGrid' object has no attribute 'split'", + "ename": "TypeError", + "evalue": "Task.__init__() missing 1 required positional argument: 'definition'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvoxel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;241;43m0.5\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m(\u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Run the task to resample all NIfTI files\u001b[39;00m\n\u001b[1;32m 7\u001b[0m outputs \u001b[38;5;241m=\u001b[39m mrgrid()\n", - "\u001b[0;31mAttributeError\u001b[0m: 'MrGrid' object has no attribute 'split'" + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m MrGrid(voxel\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m))\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Run the task to resample all NIfTI files\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmrgrid\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# Print the locations of the output files\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(p) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m outputs\u001b[38;5;241m.\u001b[39moutput))\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:299\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, name, audit_flags, cache_dir, cache_locations, messengers, messenger_args, rerun)\u001b[0m\n\u001b[1;32m 296\u001b[0m task_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mTask\n\u001b[1;32m 297\u001b[0m definition \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\n\u001b[0;32m--> 299\u001b[0m task \u001b[38;5;241m=\u001b[39m \u001b[43mtask_type\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mdefinition\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43maudit_flags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudit_flags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_locations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_locations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessenger_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessenger_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessengers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessengers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 309\u001b[0m result \u001b[38;5;241m=\u001b[39m task()\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result\u001b[38;5;241m.\u001b[39merrored:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:708\u001b[0m, in \u001b[0;36mWorkflowTask.__init__\u001b[0;34m(self, definition, name, audit_flags, cache_dir, cache_locations, input_spec, cont_dim, messenger_args, messengers, output_spec, rerun, propagate_rerun, **kwargs)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 663\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 664\u001b[0m definition,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 678\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 679\u001b[0m ):\n\u001b[1;32m 680\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 681\u001b[0m \u001b[38;5;124;03m Initialize a workflow.\u001b[39;00m\n\u001b[1;32m 682\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 705\u001b[0m \n\u001b[1;32m 706\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 708\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 709\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 710\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 711\u001b[0m \u001b[43m \u001b[49m\u001b[43mcont_dim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcont_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 712\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 713\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_locations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_locations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 714\u001b[0m \u001b[43m \u001b[49m\u001b[43maudit_flags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudit_flags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 715\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessengers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessengers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 716\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessenger_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessenger_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 717\u001b[0m \u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 718\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 720\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph \u001b[38;5;241m=\u001b[39m DiGraph(name\u001b[38;5;241m=\u001b[39mname)\n\u001b[1;32m 721\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname2obj \u001b[38;5;241m=\u001b[39m {}\n", + "\u001b[0;31mTypeError\u001b[0m: Task.__init__() missing 1 required positional argument: 'definition'" ] } ], diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 7482cd16da..c23ddd2da4 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Execution options\n", + "# Advanced Execution\n", "\n", "One of the key design features of Pydra is the separation between the parameterisation of\n", "the task to be executed, and the parameresiation of where and how the task should be\n", diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py new file mode 100644 index 0000000000..b98e22c274 --- /dev/null +++ b/new-docs/source/tutorial/tst.py @@ -0,0 +1,26 @@ +from pathlib import Path +from tempfile import mkdtemp +from fileformats.medimage import Nifti +from pydra.tasks.mrtrix3.v3_0 import MrGrid + + +if __name__ == "__main__": + test_dir = Path(mkdtemp()) + + nifti_dir = test_dir / "nifti" + nifti_dir.mkdir() + + for i in range(10): + Nifti.sample( + nifti_dir, seed=i + ) # Create a dummy NIfTI file in the dest. directory + + # Instantiate the task definition, "splitting" over all NIfTI files in the test directory + # by splitting the "input" input field over all files in the directory + mrgrid = MrGrid(voxel=(0.5, 0.5, 0.5)).split(input=nifti_dir.iterdir()) + + # Run the task to resample all NIfTI files + outputs = mrgrid() + + # Print the locations of the output files + print("\n".join(str(p) for p in outputs.output)) From 9f7aa2b0c38b73fbac82e35afadcdf1ea1248820 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 14 Jan 2025 23:15:34 +1100 Subject: [PATCH 135/342] connecting workflow logic to new api syntax --- benchmark.py | 38 + example.py | 34 + new-docs/source/index.rst | 47 +- pydra/design/base.py | 8 +- pydra/design/boutiques.py | 2 - pydra/design/python.py | 2 - pydra/design/shell.py | 2 - pydra/design/workflow.py | 10 +- pydra/engine/core.py | 862 +++++++--------------- pydra/engine/graph.py | 6 + pydra/engine/helpers.py | 8 +- pydra/engine/{workflow => }/lazy.py | 4 + pydra/engine/{workflow => }/node.py | 281 +++---- pydra/engine/specs.py | 294 ++++++-- pydra/engine/state.py | 51 ++ pydra/engine/submitter.py | 417 ++++++++--- pydra/engine/task.py | 4 +- pydra/engine/tests/test_dockertask.py | 2 +- pydra/engine/tests/test_environments.py | 14 +- pydra/engine/tests/test_node_task.py | 72 +- pydra/engine/tests/test_numpy_examples.py | 4 +- pydra/engine/tests/test_shelltask.py | 22 +- pydra/engine/tests/test_singularity.py | 12 +- pydra/engine/tests/test_submitter.py | 12 +- pydra/engine/tests/test_tasks_files.py | 18 +- pydra/engine/tests/test_workflow.py | 314 ++++---- pydra/engine/tests/utils.py | 2 +- pydra/engine/workers.py | 122 +-- pydra/engine/workflow/__init__.py | 0 pydra/engine/workflow/base.py | 184 ----- 30 files changed, 1382 insertions(+), 1466 deletions(-) create mode 100644 benchmark.py create mode 100644 example.py rename pydra/engine/{workflow => }/lazy.py (98%) rename pydra/engine/{workflow => }/node.py (53%) delete mode 100644 pydra/engine/workflow/__init__.py delete mode 100644 pydra/engine/workflow/base.py diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000000..35f6de914d --- /dev/null +++ b/benchmark.py @@ -0,0 +1,38 @@ +import asyncio +import time + + +def sync_function(x): + return x * 2 + + +async def async_function(x): + return x * 2 + + +def benchmark_sync(): + start_time = time.time() + for _ in range(1000000): + sync_function(10) + end_time = time.time() + return end_time - start_time + + +async def benchmark_async(): + start_time = time.time() + for _ in range(1000000): + await async_function(10) + end_time = time.time() + return end_time - start_time + + +def main(): + sync_time = benchmark_sync() + print(f"Sync function time: {sync_time:.6f} seconds") + + async_time = asyncio.run(benchmark_async()) + print(f"Async function time: {async_time:.6f} seconds") + + +if __name__ == "__main__": + main() diff --git a/example.py b/example.py new file mode 100644 index 0000000000..dd9dc87a5f --- /dev/null +++ b/example.py @@ -0,0 +1,34 @@ +import asyncio + + +def is_coroutine_function(func): + return asyncio.iscoroutinefunction(func) + + +async def call_function(func, *args, **kwargs): + if is_coroutine_function(func): + return await func(*args, **kwargs) + else: + return func(*args, **kwargs) + + +# Example usage +async def async_function(x): + await asyncio.sleep(1) + return x * 2 + + +def sync_function(x): + return x * 2 + + +async def main(): + result1 = await call_function(async_function, 10) + result2 = await call_function(sync_function, 10) + print(result1) # Output: 20 + print(result2) # Output: 20 + + +# To run the example +if __name__ == "__main__": + asyncio.run(main()) diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 2bba918457..39ad3065b3 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -3,40 +3,41 @@ Pydra ===== -Pydra is a lightweight Python dataflow engine for scientific analysis. -Although designed as a successor to Nipype_, Pydra is supports analytics in any domain. -Pydra helps build reproducible, scalable, reusable workflows that link processing tasks -implemented in Python or shell commands to be executed on distributed compute platforms. - -The power of Pydra lies in ease of constructing workflows containing complex -multiparameter map-reduce operations in Python code (see :ref:`Design philosophy` for -the rationale behind its design). +Pydra is a lightweight dataflow engine written in Python. Although designed to succeed +Nipype_ in order to address the needs of the neuroimaging community, Pydra can be used +for analytics in any scientific domain. Pydra facilitates the design of reproducible, +scalable and robust workflows that can link diverse processing tasks implemented as +shell commands or Python functions. **Key features:** -* Combine diverse tasks (`Python functions <./tutorial/3-python.html>`__ or `shell commands <./tutorial/4-shell.html>`__) into coherent `workflows <./tutorial/5-workflow.html>`__ -* Concurrent execution on `choice of computing platform (e.g. workstation, SLURM, SGE, Dask, etc...) <./tutorial/2-advanced-execution.html#Workers>`__ +* Combine diverse tasks (`Python functions <./tutorial/3-python.html>`__ or `shell commands <./tutorial/4-shell.html>`__) into coherent, robust `workflows <./tutorial/5-workflow.html>`__ * Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) +* Concurrent execution on `choice of computing platform (e.g. workstation, SLURM, SGE, Dask, etc...) <./tutorial/2-advanced-execution.html#Workers>`__ * Map-reduce-like semantics (see :ref:`Splitting and combining`) * Global caching to reduce recomputation (see :ref:`Caches and hashes`) * Tasks can be executed in separate software environments, e.g. containers (see :ref:`Software environments`) * Strong type-checking, including file types, before execution (see :ref:`Typing and file-formats`) +See :ref:`Design philosophy` for more details on the rationale behind Pydra's design. + Installation ------------ -Pydra itself is a pure-Python package, which has only a handful of dependencies, -therefore, it is straightforward to install via pip for Python >= 3.11 +Pydra is implemented purely in Python and has a small number of dependencies +It is easy to install via pip for Python >= 3.11 (preferably within a +`virtual environment`_): .. code-block:: bash $ pip install pydra -Pre-designed tasks are available under the `pydra.tasks.*` package namespace. These tasks -are implemented within separate packages that are typically specific to a given -shell-command toolkit such as FSL_, AFNI_ or ANTs_, or a collection of related -tasks/workflows (e.g. `niworkflows`_). Pip can be used to install these packages as well: +Pre-designed tasks are available under the `pydra.tasks.*` namespace. These tasks +are typically implemented within separate packages that are specific to a given +shell-command toolkit, such as FSL_ (*pydra-fsl*), AFNI_ (*pydra-afni*) or +ANTs_ (*pydra-ants*), or a collection of related tasks/workflows, such as Niworkflows +(*pydra-niworkflows*). Pip can be used to install these extension packages as well: .. code-block:: bash @@ -50,13 +51,15 @@ to run them (see :ref:`Software environments`). Tutorials and notebooks ----------------------- -The following tutorials provide a step-by-step guide to using Pydra. -They can be read in any order, but it is recommended to start with :ref:`Getting started`. -The tutorials are implemented as Jupyter notebooks, which can be downloaded and run locally +The following tutorials provide a step-by-step guide to using Pydra. They can be +studied in any order, but it is recommended to start with :ref:`Getting started` and +step through the list from there. + +The tutorials are written in Jupyter notebooks, which can be downloaded and run locally or run online using the |Binder| button within each tutorial. If you decide to download the notebooks and run locally, be sure to install the necessary -dependencies with +dependencies (ideally within a `virtual environment`_): .. code-block:: bash @@ -74,7 +77,8 @@ Learn how to execute existing tasks (including workflows) on different systems Design ~~~~~~ -Learn how to design your own tasks +Learn how to design your own tasks, wrapped shell commands or Python functions, or +workflows, * :ref:`Python-tasks` * :ref:`Shell-tasks` @@ -171,5 +175,6 @@ See the full reference documentation for Pydra .. _AFNI: https://afni.nimh.nih.gov/ .. _niworkflows: https://niworkflows.readthedocs.io/en/latest/ .. _Nipype: https://nipype.readthedocs.io/en/latest/ +.. _virtual environment: https://docs.python.org/3/library/venv.html .. |Binder| image:: https://mybinder.org/badge_logo.svg :target: https://mybinder.org/v2/gh/nipype/pydra/develop diff --git a/pydra/design/base.py b/pydra/design/base.py index 9bdfb1f832..c5fb817431 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -27,7 +27,7 @@ if ty.TYPE_CHECKING: from pydra.engine.specs import TaskDef, TaskOutputs - from pydra.engine.core import Task + __all__ = [ "Field", @@ -352,7 +352,6 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: def make_task_def( spec_type: type["TaskDef"], out_type: type["TaskOutputs"], - task_type: type["Task"], inputs: dict[str, Arg], outputs: dict[str, Out], klass: type | None = None, @@ -418,15 +417,12 @@ def make_task_def( name=name, bases=bases, kwds={}, - exec_body=lambda ns: ns.update( - {"Task": task_type, "Outputs": outputs_klass} - ), + exec_body=lambda ns: ns.update({"Outputs": outputs_klass}), ) else: # Ensure that the class has it's own annotations dict so we can modify it without # messing up other classes klass.__annotations__ = copy(klass.__annotations__) - klass.Task = task_type klass.Outputs = outputs_klass # Now that we have saved the attributes in lists to be for arg in inputs.values(): diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 8931877e44..8c1986c983 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -6,7 +6,6 @@ from functools import reduce from fileformats.generic import File from pydra.engine.specs import ShellDef -from pydra.engine.task import BoshTask from .base import make_task_def from . import shell @@ -115,7 +114,6 @@ def define( ) return make_task_def( spec_type=ShellDef, - task_type=BoshTask, out_type=out, arg_type=arg, inputs=inputs, diff --git a/pydra/design/python.py b/pydra/design/python.py index 8464c1c02b..433a189d65 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -121,7 +121,6 @@ def define( PythonDef The task definition class for the Python function """ - from pydra.engine.task import PythonTask from pydra.engine.specs import PythonDef, PythonOutputs def make(wrapped: ty.Callable | type) -> PythonDef: @@ -162,7 +161,6 @@ def make(wrapped: ty.Callable | type) -> PythonDef: interface = make_task_def( PythonDef, PythonOutputs, - PythonTask, parsed_inputs, parsed_outputs, name=name, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 3baad9c712..90aa168e2d 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -287,7 +287,6 @@ def define( ShellDef The interface for the shell command """ - from pydra.engine.task import ShellTask from pydra.engine.specs import ShellDef, ShellOutputs def make( @@ -376,7 +375,6 @@ def make( interface = make_task_def( ShellDef, ShellOutputs, - ShellTask, parsed_inputs, parsed_outputs, name=class_name, diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 53f76eba4a..841eebf1c0 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -14,7 +14,7 @@ ) if ty.TYPE_CHECKING: - from pydra.engine.workflow.base import Workflow + from pydra.engine.core import Workflow from pydra.engine.specs import TaskDef, TaskOutputs, WorkflowDef @@ -128,7 +128,6 @@ def define( TaskDef The interface for the function or class. """ - from pydra.engine.core import WorkflowTask from pydra.engine.specs import TaskDef, WorkflowDef, WorkflowOutputs if lazy is None: @@ -174,7 +173,6 @@ def make(wrapped: ty.Callable | type) -> TaskDef: interface = make_task_def( WorkflowDef, WorkflowOutputs, - WorkflowTask, parsed_inputs, parsed_outputs, name=name, @@ -208,12 +206,12 @@ def this() -> "Workflow": OutputsType = ty.TypeVar("OutputsType", bound="TaskOutputs") -def add(task_spec: "TaskDef[OutputsType]", name: str = None) -> OutputsType: +def add(task_def: "TaskDef[OutputsType]", name: str = None) -> OutputsType: """Add a node to the workflow currently being constructed Parameters ---------- - task_spec : TaskDef + task_def : TaskDef The definition of the task to add to the workflow as a node name : str, optional The name of the node, by default it will be the name of the task definition @@ -224,4 +222,4 @@ def add(task_spec: "TaskDef[OutputsType]", name: str = None) -> OutputsType: Outputs The outputs definition of the node """ - return this().add(task_spec, name=name) + return this().add(task_def, name=name) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index aa42e99e77..e1bae23c09 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -9,20 +9,27 @@ import typing as ty from copy import deepcopy from uuid import uuid4 -from filelock import SoftFileLock +import inspect import shutil -from tempfile import mkdtemp from traceback import format_exception import attr import cloudpickle as cp -from . import state -from . import helpers_state as hlpst +from copy import copy +from operator import itemgetter +from typing_extensions import Self +import attrs +from pydra.engine.specs import TaskDef, WorkflowDef, TaskOutputs, WorkflowOutputs +from pydra.engine.graph import DiGraph +from pydra.engine import state +from .lazy import LazyInField, LazyOutField +from pydra.utils.hash import hash_function +from pydra.utils.typing import TypeParser, StateArray +from .node import Node +from fileformats.generic import FileSet from .specs import ( - File, - RuntimeDef, + RuntimeSpec, Result, TaskHook, - TaskDef, ) from .helpers import ( create_checksum, @@ -37,19 +44,21 @@ list_fields, is_lazy, ) -from pydra.utils.hash import hash_function from .helpers_file import copy_nested_files, template_update -from .graph import DiGraph -from .audit import Audit from pydra.utils.messenger import AuditFlag -from fileformats.core import FileSet logger = logging.getLogger("pydra") develop = False +if ty.TYPE_CHECKING: + from pydra.engine.submitter import Submitter, NodeExecution + from pydra.design.base import Arg + +DefType = ty.TypeVar("DefType", bound=TaskDef) -class Task: + +class Task(ty.Generic[DefType]): """ A base structure for the nodes in the processing graph. @@ -71,29 +80,25 @@ class Task: _can_resume = False # Does the task allow resuming from previous state _redirect_x = False # Whether an X session should be created/directed - _runtime_requirements = RuntimeDef() + _runtime_requirements = RuntimeSpec() _runtime_hints = None _cache_dir = None # Working directory in which to operate _references = None # List of references for a task name: str - definition: TaskDef + definition: DefType + submitter: "Submitter" + state_index: state.StateIndex _inputs: dict[str, ty.Any] | None = None def __init__( self, - definition, - name: str | None = None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - inputs: ty.Optional[ty.Union[ty.Text, File, ty.Dict]] = None, - cont_dim=None, - messenger_args=None, - messengers=None, - rerun=False, + definition: DefType, + submitter: "Submitter", + name: str, + state_index: "state.StateIndex | None" = None, ): """ Initialize a task. @@ -137,29 +142,13 @@ def __init__( if Task._etelemetry_version_data is None: Task._etelemetry_version_data = check_latest_version() + if state_index is None: + state_index = state.StateIndex() + self.definition = definition self.name = name - - self.input_names = [ - field.name - for field in attr.fields(type(self.definition)) - if field.name not in ["_func", "_graph_checksums"] - ] - - if inputs: - if isinstance(inputs, dict): - # selecting items that are in input_names (ignoring fields that are not in input_spec) - inputs = {k: v for k, v in inputs.items() if k in self.input_names} - # TODO: this needs to finished and tested after #305 - elif Path(inputs).is_file(): - inputs = json.loads(Path(inputs).read_text()) - # TODO: this needs to finished and tested after #305 - elif isinstance(inputs, str): - if self._input_sets is None or inputs not in self._input_sets: - raise ValueError(f"Unknown input set {inputs!r}") - inputs = self._input_sets[inputs] - - self.definition = attr.evolve(self.definition, **inputs) + self.submitter = submitter + self.state_index = state_index # checking if metadata is set properly self.definition._check_resolved() @@ -171,19 +160,9 @@ def __init__( if self._input_sets is None: self._input_sets = {} - self.audit = Audit( - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - develop=develop, - ) - self.cache_dir = cache_dir - self.cache_locations = cache_locations self.allow_cache_override = True self._checksum = None self._uid = uuid4().hex - # if True the results are not checked (does not propagate to nodes) - self.task_rerun = rerun self.plugin = None self.hooks = TaskHook() @@ -277,20 +256,6 @@ def can_resume(self): def _run_task(self, environment=None): pass - @property - def cache_dir(self): - """Get the location of the cache directory.""" - return self._cache_dir - - @cache_dir.setter - def cache_dir(self, location): - if location is not None: - self._cache_dir = Path(location).resolve() - self._cache_dir.mkdir(parents=False, exist_ok=True) - else: - self._cache_dir = mkdtemp() - self._cache_dir = Path(self._cache_dir).resolve() - @property def cache_locations(self): """Get the list of cache sources.""" @@ -323,37 +288,6 @@ def cont_dim(self, cont_dim): else: self._cont_dim = cont_dim - def __call__( - self, - submitter=None, - plugin=None, - plugin_kwargs=None, - rerun=False, - environment=None, - **kwargs, - ): - """Make tasks callable themselves.""" - from .submitter import Submitter - - if submitter and plugin: - raise Exception("Defify submitter OR plugin, not both") - elif submitter: - pass - # if there is plugin provided or the task is a Workflow or has a state, - # the submitter will be created using provided plugin, self.plugin or "cf" - elif plugin: - plugin = plugin or self.plugin or "cf" - if plugin_kwargs is None: - plugin_kwargs = {} - submitter = Submitter(plugin=plugin, **plugin_kwargs) - - if submitter: - with submitter as sub: - res = sub(self, environment=environment) - else: # tasks without state could be run without a submitter - res = self._run(rerun=rerun, environment=environment, **kwargs) - return res - @property def inputs(self) -> dict[str, ty.Any]: """Resolve any template inputs of the task ahead of its execution: @@ -378,6 +312,7 @@ def inputs(self) -> dict[str, ty.Any]: if not k.startswith("_") } map_copyfiles = {} + fld: "Arg" for fld in list_fields(self.definition): name = fld.name value = self._inputs[name] @@ -417,43 +352,42 @@ def _populate_filesystem(self, checksum, output_dir): shutil.rmtree(output_dir) output_dir.mkdir(parents=False, exist_ok=self.can_resume) - def _run(self, rerun=False, environment=None): + async def run(self, submitter: "Submitter"): checksum = self.checksum output_dir = self.output_dir lockfile = self.cache_dir / (checksum + ".lock") - # Eagerly retrieve cached - see scenarios in __init__() self.hooks.pre_run(self) logger.debug("'%s' is attempting to acquire lock on %s", self.name, lockfile) - with SoftFileLock(lockfile): - if not (rerun or self.task_rerun): + async with PydraFileLock(lockfile): + if not (submitter.rerun): result = self.result() if result is not None and not result.errored: return result cwd = os.getcwd() self._populate_filesystem(checksum, output_dir) - os.chdir(output_dir) result = Result(output=None, runtime=None, errored=False) self.hooks.pre_run_task(self) self.audit.start_audit(odir=output_dir) - if self.audit.audit_check(AuditFlag.PROV): - self.audit.audit_task(task=self) try: self.audit.monitor() - self._run_task(environment=environment) + if inspect.iscoroutinefunction(self._run_task): + await self.definition._run(self, submitter) + else: + self.definition._run(self, submitter) result.output = self.definition.Outputs.from_task(self) except Exception: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) record_error(output_dir, error=traceback) result.errored = True + self._errored = True raise finally: self.hooks.post_run_task(self, result) - self.audit.finalize_audit(result) + self.audit.finalize_audit(result=result) save(output_dir, result=result, task=self) # removing the additional file with the checksum (self.cache_dir / f"{self.uid}_info.json").unlink() - # Restore original values to inputs os.chdir(cwd) self.hooks.post_run(self, result) # Check for any changes to the input hashes that have occurred during the execution @@ -461,46 +395,9 @@ def _run(self, rerun=False, environment=None): self._check_for_hash_changes() return result - def _extract_input_el(self, inputs, inp_nm, ind): - """ - Extracting element of the inputs taking into account - container dimension of the specific element that can be set in self.cont_dim. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - If - """ - if f"{self.name}.{inp_nm}" in self.cont_dim: - return list( - hlpst.flatten( - ensure_list(getattr(inputs, inp_nm)), - max_depth=self.cont_dim[f"{self.name}.{inp_nm}"], - ) - )[ind] - else: - return getattr(inputs, inp_nm)[ind] - - def get_input_el(self, ind): - """Collect all inputs required to run the node (for specific state element).""" - # TODO: doesn't work properly for more cmplicated wf (check if still an issue) - input_ind = self.state.inputs_ind[ind] - inputs_dict = {} - for inp in set(self.input_names): - if f"{self.name}.{inp}" in input_ind: - inputs_dict[inp] = self._extract_input_el( - inputs=self.definition, - inp_nm=inp, - ind=input_ind[f"{self.name}.{inp}"], - ) - return inputs_dict - # else: - # # todo it never gets here - # breakpoint() - # inputs_dict = {inp: getattr(self.inputs, inp) for inp in self.input_names} - # return None, inputs_dict - def pickle_task(self): """Pickling the tasks with full inputs""" - pkl_files = self.cache_dir / "pkl_files" + pkl_files = self.submitter.cache_dir / "pkl_files" pkl_files.mkdir(exist_ok=True, parents=True) task_main_path = pkl_files / f"{self.name}_{self.uid}_task.pklz" save(task_path=pkl_files, task=self, name_prefix=f"{self.name}_{self.uid}") @@ -602,15 +499,6 @@ def result(self, state_index=None, return_inputs=False): else: return result - def _reset(self): - """Reset the connections between inputs and LazyFields.""" - for field in attrs_fields(self.definition): - if field.name in self.inp_lf: - setattr(self.definition, field.name, self.inp_lf[field.name]) - if is_workflow(self): - for task in self.graph.nodes: - task._reset() - def _check_for_hash_changes(self): hash_changes = self.definition._hash_changes() details = "" @@ -656,449 +544,297 @@ def _check_for_hash_changes(self): DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any -class WorkflowTask(Task): - """A composite task with structure of computational graph.""" +logger = logging.getLogger("pydra") - def __init__( - self, - name, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - input_spec: ty.Optional[ - ty.Union[ty.List[ty.Text], ty.Dict[ty.Text, ty.Type[ty.Any]]] - ] = None, - cont_dim=None, - messenger_args=None, - messengers=None, - output_spec: ty.Optional[ty.Union[ty.List[str], ty.Dict[str, type]]] = None, - rerun=False, - propagate_rerun=True, - **kwargs, - ): - """ - Initialize a workflow. +OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) +WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) - Parameters - ---------- - name : :obj:`str` - Unique name of this node - audit_flags : :class:`AuditFlag`, optional - Configure provenance tracking. Default is no provenance tracking. - See available flags at :class:`~pydra.utils.messenger.AuditFlag`. - cache_dir : :obj:`os.pathlike` - Set a custom directory of previously computed nodes. - cache_locations : - TODO - inputs : :obj:`typing.Text`, or :class:`File`, or :obj:`dict`, or `None`. - Set particular inputs to this node. - cont_dim : :obj:`dict`, or `None` - Container dimensions for input fields, - if any of the container should be treated as a container - messenger_args : - TODO - messengers : - TODO - output_spec : - TODO - """ +@attrs.define(auto_attribs=False) +class Workflow(ty.Generic[WorkflowOutputsType]): + """A workflow, constructed from a workflow definition - if name in dir(self): + Parameters + ---------- + name : str + The name of the workflow + inputs : TaskDef + The input definition of the workflow + outputs : TaskDef + The output definition of the workflow + """ + + name: str = attrs.field() + inputs: WorkflowDef[WorkflowOutputsType] = attrs.field() + outputs: WorkflowOutputsType = attrs.field() + _nodes: dict[str, Node] = attrs.field(factory=dict) + + @classmethod + def construct( + cls, + definition: WorkflowDef[WorkflowOutputsType], + ) -> Self: + """Construct a workflow from a definition, caching the constructed worklow""" + + lazy_inputs = [f for f in list_fields(type(definition)) if f.lazy] + + # Create a cache key by hashing all the non-lazy input values in the definition + # and use this to store the constructed workflow in case it is reused or nested + # and split over within another workflow + lazy_input_names = {f.name for f in lazy_inputs} + non_lazy_vals = tuple( + sorted( + ( + i + for i in attrs_values(definition).items() + if i[0] not in lazy_input_names + ), + key=itemgetter(0), + ) + ) + if lazy_non_lazy_vals := [f for f in non_lazy_vals if is_lazy(f[1])]: raise ValueError( - "Cannot use names of attributes or methods as workflow name" + f"Lazy input fields {lazy_non_lazy_vals} found in non-lazy fields " ) - self.name = name + hash_key = hash_function(non_lazy_vals) + if hash_key in cls._constructed: + return cls._constructed[hash_key] - super().__init__( - name=name, - inputs=kwargs, - cont_dim=cont_dim, - cache_dir=cache_dir, - cache_locations=cache_locations, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - rerun=rerun, + # Initialise the outputs of the workflow + outputs = definition.Outputs( + **{f.name: attrs.NOTHING for f in attrs.fields(definition.Outputs)} ) - self.graph = DiGraph(name=name) - self.name2obj = {} - self._lzin = None - self._pre_split = ( - False # To signify if the workflow has been split on task load or not + # Initialise the lzin fields + lazy_spec = copy(definition) + wf = cls.under_construction = Workflow( + name=type(definition).__name__, + inputs=lazy_spec, + outputs=outputs, ) + for lzy_inpt in lazy_inputs: + setattr( + lazy_spec, + lzy_inpt.name, + LazyInField( + workflow=wf, + field=lzy_inpt.name, + type=lzy_inpt.type, + ), + ) - # store output connections - self._connections = None - # propagating rerun if task_rerun=True - self.propagate_rerun = propagate_rerun - - def __getattr__(self, name): - if name in self.name2obj: - return self.name2obj[name] - return self.__getattribute__(name) - - @property - def nodes(self): - """Get the list of node names.""" - return self.name2obj.values() + input_values = attrs_values(lazy_spec) + constructor = input_values.pop("constructor") + cls._under_construction = wf + try: + # Call the user defined constructor to set the outputs + output_lazy_fields = constructor(**input_values) + # Check to see whether any mandatory inputs are not set + for node in wf.nodes: + node._spec._check_rules() + # Check that the outputs are set correctly, either directly by the constructor + # or via returned values that can be zipped with the output names + if output_lazy_fields: + if not isinstance(output_lazy_fields, (list, tuple)): + output_lazy_fields = [output_lazy_fields] + output_fields = list_fields(definition.Outputs) + if len(output_lazy_fields) != len(output_fields): + raise ValueError( + f"Expected {len(output_fields)} outputs, got " + f"{len(output_lazy_fields)} ({output_lazy_fields})" + ) + for outpt, outpt_lf in zip(output_fields, output_lazy_fields): + # Automatically combine any uncombined state arrays into lists + if TypeParser.get_origin(outpt_lf.type) is StateArray: + outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] + setattr(outputs, outpt.name, outpt_lf) + else: + if unset_outputs := [ + a for a, v in attrs_values(outputs).items() if v is attrs.NOTHING + ]: + raise ValueError( + f"Expected outputs {unset_outputs} to be set by the " + f"constructor of {wf!r}" + ) + finally: + cls._under_construction = None - @property - def graph_sorted(self): - """Get a sorted graph representation of the workflow.""" - return self.graph.sorted_nodes + cls._constructed[hash_key] = wf - @property - def checksum(self): - """Calculates the unique checksum of the task. - Used to create specific directory name for task that are run; - and to create nodes checksums needed for graph checksums - (before the tasks have inputs etc.) - """ - # if checksum is called before run the _graph_checksums is not ready - if is_workflow(self) and self.definition._graph_checksums is attr.NOTHING: - self.definition._graph_checksums = { - nd.name: nd.checksum for nd in self.graph_sorted - } + return wf - input_hash = self.definition.hash - if not self.state: - self._checksum = create_checksum( - self.__class__.__name__, self._checksum_wf(input_hash) - ) - else: - self._checksum = create_checksum( - self.__class__.__name__, - self._checksum_wf(input_hash, with_splitter=True), - ) - return self._checksum + @classmethod + def clear_cache(cls): + """Clear the cache of constructed workflows""" + cls._constructed.clear() - def _checksum_wf(self, input_hash, with_splitter=False): - """creating hash value for workflows - includes connections and splitter if with_splitter is True - """ - connection_hash = hash_function(self._connections) - hash_list = [input_hash, connection_hash] - if with_splitter and self.state: - # including splitter in the hash - splitter_hash = hash_function(self.state.splitter) - hash_list.append(splitter_hash) - return hash_function(hash_list) - - def add(self, task): - """ - Add a task to the workflow. + def add(self, task_spec: TaskDef[OutputsType], name=None) -> OutputsType: + """Add a node to the workflow Parameters ---------- - task : :class:`TaskBase` - The task to be added. + task_spec : TaskDef + The definition of the task to add to the workflow as a node + name : str, optional + The name of the node, by default it will be the name of the task definition + class + Returns + ------- + OutputType + The outputs definition of the node """ - if task.name in dir(self): - raise ValueError( - "Cannot use names of workflow attributes or methods as task name" - ) - if task.name in self.name2obj: + if name is None: + name = type(task_spec).__name__ + if name in self._nodes: + raise ValueError(f"Node with name {name!r} already exists in the workflow") + node = Node[OutputsType](name=name, definition=task_spec, workflow=self) + self._nodes[name] = node + return node.lzout + + def __getitem__(self, key: str) -> Node: + return self._nodes[key] + + @property + def nodes(self) -> ty.Iterable[Node]: + return self._nodes.values() + + @property + def node_names(self) -> list[str]: + return list(self._nodes) + + @property + @classmethod + def under_construction(cls) -> "Workflow[ty.Any]": + if cls._under_construction is None: raise ValueError( - "Another task named {} is already added to the workflow".format( - task.name - ) + "pydra.design.workflow.this() can only be called from within a workflow " + "constructor function (see 'pydra.design.workflow.define')" ) - self.name2obj[task.name] = task + return cls._under_construction + + # Used to store the workflow that is currently being constructed + _under_construction: "Workflow[ty.Any]" = None + # Used to cache the constructed workflows by their hashed input values + _constructed: dict[int, "Workflow[ty.Any]"] = {} - if not is_task(task): - raise ValueError(f"Unknown workflow element: {task!r}") - self.graph.add_nodes(task) - self._last_added = task - logger.debug(f"Added {task}") - return self + def execution_graph(self, submitter: "Submitter") -> DiGraph: + return self._create_graph([NodeExecution(n, submitter) for n in self.nodes]) - def create_connections(self, task, detailed=False): + @property + def graph(self) -> DiGraph: + return self._create_graph(self.nodes, detailed=True) + + def _create_graph( + self, nodes: "list[Node | NodeExecution]", detailed: bool = False + ) -> DiGraph: """ - Add and connect a particular task to existing nodes in the workflow. + Connects a particular task to existing nodes in the workflow. Parameters ---------- - task : :class:`TaskBase` - The task to be added. - detailed : :obj:`bool` - If True, `add_edges_description` is run for self.graph to add - a detailed descriptions of the connections (input/output fields names) + detailed : bool + If True, `add_edges_description` is run a detailed descriptions of the + connections (input/output fields names) + node_klass : type, optional + The class to use for the nodes in the workflow. If provided the node is + wrapped by an instance of the class, if None the node is added as is, + by default None + + Returns + ------- + DiGraph + The graph of the workflow """ + graph: DiGraph = attrs.field(factory=DiGraph) + for node in nodes: + graph.add_nodes(node) # TODO: create connection is run twice - other_states = {} - for field in attrs_fields(task.inputs): - val = getattr(task.inputs, field.name) - if is_lazy(val): - # saving all connections with LazyFields - task.inp_lf[field.name] = val - # adding an edge to the graph if task id expecting output from a different task - if val.name != self.name: - # checking if the connection is already in the graph - if (getattr(self, val.name), task) not in self.graph.edges: - self.graph.add_edges((getattr(self, val.name), task)) - if detailed: - self.graph.add_edges_description( - (task.name, field.name, val.name, val.field) - ) - logger.debug("Connecting %s to %s", val.name, task.name) - # adding a state from the previous task to other_states - if ( - getattr(self, val.name).state - and getattr(self, val.name).state.splitter_rpn_final - ): - # variables that are part of inner splitters should be treated as a containers + for node in nodes: + other_states = {} + for field in attrs_fields(node.inputs): + lf = node._definition[field.name] + if isinstance(lf, LazyOutField): + # adding an edge to the graph if task id expecting output from a different task + if lf.name != self.name: + # checking if the connection is already in the graph + if (self[lf.name], node) not in graph.edges: + graph.add_edges((self[lf.name], node)) + if detailed: + graph.add_edges_description( + (node.name, field.name, lf.name, lf.field) + ) + logger.debug("Connecting %s to %s", lf.name, node.name) + # adding a state from the previous task to other_states if ( - task.state - and f"{task.name}.{field.name}" in task.state.splitter + self[lf.name].state + and self[lf.name].state.splitter_rpn_final ): - task._inner_cont_dim[f"{task.name}.{field.name}"] = 1 - # adding task_name: (task.state, [a field from the connection] - if val.name not in other_states: - other_states[val.name] = ( - getattr(self, val.name).state, - [field.name], + # variables that are part of inner splitters should be + # treated as a containers + if ( + node.state + and f"{node.name}.{field.name}" in node.state.splitter + ): + node._inner_cont_dim[f"{node.name}.{field.name}"] = 1 + # adding task_name: (task.state, [a field from the connection] + if lf.name not in other_states: + other_states[lf.name] = ( + self[lf.name].state, + [field.name], + ) + else: + # if the task already exist in other_state, + # additional field name should be added to the list of fields + other_states[lf.name][1].append(field.name) + else: # LazyField with the wf input + # connections with wf input should be added to the detailed graph description + if detailed: + graph.add_edges_description( + (node.name, field.name, lf.name, lf.field) ) - else: - # if the task already exist in other_state, - # additional field name should be added to the list of fields - other_states[val.name][1].append(field.name) - else: # LazyField with the wf input - # connections with wf input should be added to the detailed graph description - if detailed: - self.graph.add_edges_description( - (task.name, field.name, val.name, val.field) - ) - - # if task has connections state has to be recalculated - if other_states: - if hasattr(task, "fut_combiner"): - combiner = task.fut_combiner - else: - combiner = None - - if task.state: - task.state.update_connections( - new_other_states=other_states, new_combiner=combiner - ) - else: - task.state = state.State( - task.name, - splitter=None, - other_states=other_states, - combiner=combiner, - ) - - async def _run(self, submitter=None, rerun=False, **kwargs): - # output_spec needs to be set using set_output or at workflow initialization - if self.output_spec is None: - raise ValueError( - "Workflow output cannot be None, use set_output to define output(s)" - ) - # creating connections that were defined after adding tasks to the wf - self._connect_and_propagate_to_tasks( - propagate_rerun=self.task_rerun and self.propagate_rerun - ) - checksum = self.checksum - output_dir = self.output_dir - lockfile = self.cache_dir / (checksum + ".lock") - self.hooks.pre_run(self) - logger.debug( - "'%s' is attempting to acquire lock on %s with Pydra lock", - self.name, - lockfile, - ) - async with PydraFileLock(lockfile): - if not (rerun or self.task_rerun): - result = self.result() - if result is not None and not result.errored: - return result - cwd = os.getcwd() - self._populate_filesystem(checksum, output_dir) - result = Result(output=None, runtime=None, errored=False) - self.hooks.pre_run_task(self) - self.audit.start_audit(odir=output_dir) - try: - self.audit.monitor() - await self._run_task(submitter, rerun=rerun) - result.output = self._collect_outputs() - except Exception: - etype, eval, etr = sys.exc_info() - traceback = format_exception(etype, eval, etr) - record_error(output_dir, error=traceback) - result.errored = True - self._errored = True - raise - finally: - self.hooks.post_run_task(self, result) - self.audit.finalize_audit(result=result) - save(output_dir, result=result, task=self) - # removing the additional file with the checksum - (self.cache_dir / f"{self.uid}_info.json").unlink() - os.chdir(cwd) - self.hooks.post_run(self, result) - # Check for any changes to the input hashes that have occurred during the execution - # of the task - self._check_for_hash_changes() - return result + # if task has connections state has to be recalculated + if other_states: + if hasattr(node, "fut_combiner"): + combiner = node.fut_combiner + else: + combiner = None - async def _run_task(self, submitter, rerun=False, environment=None): - if not submitter: - raise Exception("Submitter should already be set.") - for nd in self.graph.nodes: - if nd.allow_cache_override: - nd.cache_dir = self.cache_dir - # at this point Workflow is stateless so this should be fine - await submitter.expand_workflow(self, rerun=rerun) - - # def set_output( - # self, - # connections: ty.Union[ - # ty.Tuple[str, LazyField], ty.List[ty.Tuple[str, LazyField]] - # ], - # ): - # """ - # Set outputs of the workflow by linking them with lazy outputs of tasks - - # Parameters - # ---------- - # connections : tuple[str, LazyField] or list[tuple[str, LazyField]] or None - # single or list of tuples linking the name of the output to a lazy output - # of a task in the workflow. - # """ - # from pydra.utils.typing import TypeParser - - # if self._connections is None: - # self._connections = [] - # if isinstance(connections, tuple) and len(connections) == 2: - # new_connections = [connections] - # elif isinstance(connections, list) and all( - # [len(el) == 2 for el in connections] - # ): - # new_connections = connections - # elif isinstance(connections, dict): - # new_connections = list(connections.items()) - # else: - # raise TypeError( - # "Connections can be a 2-elements tuple, a list of these tuples, or dictionary" - # ) - # # checking if a new output name is already in the connections - # connection_names = [name for name, _ in self._connections] - # if self.output_spec: - # output_types = {a.name: a.type for a in attr.fields(self.interface.Outputs)} - # else: - # output_types = {} - # # Check for type matches with explicitly defined outputs - # conflicting = [] - # type_mismatches = [] - # for conn_name, lazy_field in new_connections: - # if conn_name in connection_names: - # conflicting.append(conn_name) - # try: - # output_type = output_types[conn_name] - # except KeyError: - # pass - # else: - # if not TypeParser.matches_type(lazy_field.type, output_type): - # type_mismatches.append((conn_name, output_type, lazy_field.type)) - # if conflicting: - # raise ValueError(f"the output names {conflicting} are already set") - # if type_mismatches: - # raise TypeError( - # f"the types of the following outputs of {self} don't match their declared types: " - # + ", ".join( - # f"{n} (expected: {ex}, provided: {p})" - # for n, ex, p in type_mismatches - # ) - # ) - # self._connections += new_connections - # fields = [] - # for con in self._connections: - # wf_out_nm, lf = con - # task_nm, task_out_nm = lf.name, lf.field - # if task_out_nm == "all_": - # help = f"all outputs from {task_nm}" - # fields.append((wf_out_nm, dict, {"help": help})) - # else: - # from pydra.utils.typing import TypeParser - - # # getting information about the output field from the task output_spec - # # providing proper type and some help string - # task_output_spec = getattr(self, task_nm).output_spec - # out_fld = attr.fields_dict(task_output_spec)[task_out_nm] - # help = ( - # f"{out_fld.metadata.get('help', '')} (from {task_nm})" - # ) - # if TypeParser.get_origin(lf.type) is StateArray: - # type_ = TypeParser.get_item_type(lf.type) - # else: - # type_ = lf.type - # fields.append((wf_out_nm, type_, {"help": help})) - # self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseDef,)) - # logger.info("Added %s to %s", self.output_spec, self) - - def _collect_outputs(self): - output_klass = self.definition.Outputs - output = output_klass( - **{f.name: attr.NOTHING for f in attr.fields(output_klass)} - ) - # collecting outputs from tasks - output_wf = {} - for name, val in self._connections: - if not is_lazy(val): - raise ValueError("all connections must be lazy") - try: - val_out = val.get_value(self) - output_wf[name] = val_out - except (ValueError, AttributeError) as e: - output_wf[name] = None - # checking if the tasks has predecessors that raises error - if isinstance(getattr(self, val.name)._errored, list): - raise ValueError( - f"Tasks {getattr(self, val.name)._errored} raised an error" + if node.state: + node.state.update_connections( + new_other_states=other_states, new_combiner=combiner ) else: - if isinstance(getattr(self, val.name).output_dir, list): - err_file = [ - el / "_error.pklz" - for el in getattr(self, val.name).output_dir - ] - if not all(e.exists() for e in err_file): - raise e - else: - err_file = getattr(self, val.name).output_dir / "_error.pklz" - if not Path(err_file).exists(): - raise e - raise ValueError( - f"Task {val.name} raised an error, full crash report is here: " - f"{err_file}" + node.state = state.State( + node.name, + splitter=None, + other_states=other_states, + combiner=combiner, ) - return attr.evolve(output, **output_wf) def create_dotfile(self, type="simple", export=None, name=None, output_dir=None): """creating a graph - dotfile and optionally exporting to other formats""" outdir = output_dir if output_dir is not None else self.cache_dir + graph = self.graph if not name: name = f"graph_{self.name}" if type == "simple": - for task in self.graph.nodes: + for task in graph.nodes: self.create_connections(task) - dotfile = self.graph.create_dotfile_simple(outdir=outdir, name=name) + dotfile = graph.create_dotfile_simple(outdir=outdir, name=name) elif type == "nested": - for task in self.graph.nodes: + for task in graph.nodes: self.create_connections(task) - dotfile = self.graph.create_dotfile_nested(outdir=outdir, name=name) + dotfile = graph.create_dotfile_nested(outdir=outdir, name=name) elif type == "detailed": # create connections with detailed=True - for task in self.graph.nodes: + for task in graph.nodes: self.create_connections(task, detailed=True) # adding wf outputs for wf_out, lf in self._connections: - self.graph.add_edges_description((self.name, wf_out, lf.name, lf.field)) - dotfile = self.graph.create_dotfile_detailed(outdir=outdir, name=name) + graph.add_edges_description((self.name, wf_out, lf.name, lf.field)) + dotfile = graph.create_dotfile_detailed(outdir=outdir, name=name) else: raise Exception( f"type of the graph can be simple, detailed or nested, " @@ -1113,35 +849,9 @@ def create_dotfile(self, type="simple", export=None, name=None, output_dir=None) export = [export] formatted_dot = [] for ext in export: - formatted_dot.append(self.graph.export_graph(dotfile=dotfile, ext=ext)) + formatted_dot.append(graph.export_graph(dotfile=dotfile, ext=ext)) return dotfile, formatted_dot - def _connect_and_propagate_to_tasks( - self, - *, - propagate_rerun=False, - override_task_caches=False, - ): - """ - Visit each node in the graph and create the connections. - Additionally checks if all tasks should be rerun. - """ - for task in self.graph.nodes: - self.create_connections(task) - # if workflow has task_rerun=True and propagate_rerun=True, - # it should be passed to the tasks - if propagate_rerun: - task.task_rerun = True - # if the task is a wf, than the propagate_rerun should be also set - if is_workflow(task): - task.propagate_rerun = True - - # ported from Submitter.__call__ - # TODO: no prepare state ? - if override_task_caches and task.allow_cache_override: - task.cache_dir = self.cache_dir - task.cache_locations = task._cache_locations + self.cache_locations - def is_task(obj): """Check whether an object looks like a task.""" @@ -1150,7 +860,9 @@ def is_task(obj): def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" - return isinstance(obj, WorkflowTask) + from pydra.engine.specs import WorkflowDef + + return isinstance(obj, WorkflowDef) def has_lazy(obj): diff --git a/pydra/engine/graph.py b/pydra/engine/graph.py index bfa62e0764..25b8ef6a74 100644 --- a/pydra/engine/graph.py +++ b/pydra/engine/graph.py @@ -309,6 +309,12 @@ def _checking_successors_nodes(self, node, remove=True): else: return True + def successors_nodes(self, node): + """Get all the nodes that follow the node""" + self._successors_all = [] + self._checking_successors_nodes(node=node, remove=False) + return set(self._successors_all) + def remove_successors_nodes(self, node): """Removing all the nodes that follow the node""" self._successors_all = [] diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 3f2de60486..2a6e7ec0d2 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -34,7 +34,11 @@ def attrs_fields(definition, exclude_names=()) -> list[attrs.Attribute]: def attrs_values(obj, **kwargs) -> dict[str, ty.Any]: """Get the values of an attrs object.""" - return attrs.asdict(obj, recurse=False, **kwargs) + return { + n: v + for n, v in attrs.asdict(obj, recurse=False, **kwargs).items() + if not n.startswith("_") + } def list_fields(definition: "type[TaskDef] | TaskDef") -> list["Field"]: @@ -592,6 +596,6 @@ def ensure_list(obj, tuple2list=False): def is_lazy(obj): """Check whether an object is a lazy field or has any attribute that is a Lazy Field""" - from pydra.engine.workflow.lazy import LazyField + from pydra.engine.lazy import LazyField return isinstance(obj, LazyField) diff --git a/pydra/engine/workflow/lazy.py b/pydra/engine/lazy.py similarity index 98% rename from pydra/engine/workflow/lazy.py rename to pydra/engine/lazy.py index 76f5a07178..6c8ddce71f 100644 --- a/pydra/engine/workflow/lazy.py +++ b/pydra/engine/lazy.py @@ -120,6 +120,10 @@ class LazyOutField(LazyField[T]): node: node.Node attr_type = "output" + @property + def name(self) -> str: + return self.node.name + def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: """Return the value of a lazy field. diff --git a/pydra/engine/workflow/node.py b/pydra/engine/node.py similarity index 53% rename from pydra/engine/workflow/node.py rename to pydra/engine/node.py index 7010aa27b3..98c168c394 100644 --- a/pydra/engine/workflow/node.py +++ b/pydra/engine/node.py @@ -1,22 +1,25 @@ import typing as ty from copy import deepcopy, copy from enum import Enum -from pathlib import Path import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy -from ..specs import TaskDef, TaskOutputs, WorkflowDef -from ..task import Task -from ..helpers import ensure_list, attrs_values, is_lazy, load_result, create_checksum +from pydra.engine.helpers import ( + ensure_list, + attrs_values, + is_lazy, + create_checksum, +) from pydra.utils.hash import hash_function -from .. import helpers_state as hlpst -from ..state import State +from pydra.engine import helpers_state as hlpst +from pydra.engine.state import State, StateIndex if ty.TYPE_CHECKING: - from .base import Workflow + from .core import Workflow + from pydra.engine.specs import TaskDef, TaskOutputs -OutputType = ty.TypeVar("OutputType", bound=TaskOutputs) +OutputType = ty.TypeVar("OutputType", bound="TaskOutputs") Splitter = ty.Union[str, ty.Tuple[str, ...]] _not_set = Enum("_not_set", "NOT_SET") @@ -37,7 +40,7 @@ class Node(ty.Generic[OutputType]): """ name: str - _spec: TaskDef[OutputType] + _definition: "TaskDef[OutputType]" _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False) _lzout: OutputType | None = attrs.field( init=False, default=None, eq=False, hash=False @@ -50,6 +53,28 @@ class Node(ty.Generic[OutputType]): init=False, factory=dict ) # QUESTION: should this be included in the state? + def __attrs_post_init__(self): + # Add node name to state's splitter, combiner and cont_dim loaded from the def + splitter = self._definition._splitter + combiner = self._definition._combiner + if splitter: + splitter = hlpst.add_name_splitter(splitter, self.name) + if combiner: + combiner = hlpst.add_name_combiner(combiner, self.name) + if self._definition._cont_dim: + self._cont_dim = {} + for key, val in self._definition._cont_dim.items(): + self._cont_dim[f"{self.name}.{key}"] = val + self._set_state(splitter=splitter, combiner=combiner) + if combiner: + if not_split := [ + c for c in combiner if not any(c in s for s in self.state.splitter_rpn) + ]: + raise ValueError( + f"Combiner fields {not_split} for Node {self.name!r} are not in the " + f"splitter fields {self.state.splitter_rpn}" + ) + class Inputs: """A class to wrap the inputs of a node and control access to them so lazy fields that will change the downstream state (i.e. with new splits) aren't set after @@ -62,10 +87,10 @@ def __init__(self, node: "Node") -> None: super().__setattr__("_node", node) def __getattr__(self, name: str) -> ty.Any: - return getattr(self._node._spec, name) + return getattr(self._node._definition, name) def __setattr__(self, name: str, value: ty.Any) -> None: - setattr(self._node._spec, name, value) + setattr(self._node._definition, name, value) if is_lazy(value): upstream_states = self._node._get_upstream_states() if ( @@ -81,6 +106,10 @@ def __setattr__(self, name: str, value: ty.Any) -> None: def inputs(self) -> Inputs: return self.Inputs(self) + @property + def input_names(self) -> list[str]: + return list(attrs_values(self._definition).keys()) + @property def state(self): """Initialise the state of the node just after it has been created (i.e. before @@ -93,7 +122,7 @@ def state(self): @property def input_values(self) -> tuple[tuple[str, ty.Any]]: - return tuple(attrs_values(self._spec).items()) + return tuple(attrs_values(self._definition).items()) @property def lzout(self) -> OutputType: @@ -119,136 +148,6 @@ def lzout(self) -> OutputType: self._wrap_lzout_types_in_state_arrays() return outputs - def split( - self, - splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, - /, - overwrite: bool = False, - cont_dim: ty.Optional[dict] = None, - **inputs, - ): - """ - Run this task parametrically over lists of split inputs. - - Parameters - ---------- - splitter : str or list[str] or tuple[str] or None - the fields which to split over. If splitting over multiple fields, lists of - fields are interpreted as outer-products and tuples inner-products. If None, - then the fields to split are taken from the keyword-arg names. - overwrite : bool, optional - whether to overwrite an existing split on the node, by default False - cont_dim : dict, optional - Container dimensions for specific inputs, used in the splitter. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - **inputs - fields to split over, will automatically be wrapped in a StateArray object - and passed to the node inputs - - Returns - ------- - self : TaskDef - a reference to the task - """ - self._check_if_outputs_have_been_used("the node cannot be split or combined") - if splitter is None and inputs: - splitter = list(inputs) - elif splitter: - missing = set(hlpst.unwrap_splitter(splitter)) - set(inputs) - missing = [m for m in missing if not m.startswith("_")] - if missing: - raise ValueError( - f"Split is missing values for the following fields {list(missing)}" - ) - splitter = hlpst.add_name_splitter(splitter, self.name) - # if user want to update the splitter, overwrite has to be True - if self._state and not overwrite and self._state.splitter != splitter: - raise Exception( - "splitter has been already set, " - "if you want to overwrite it - use overwrite=True" - ) - if cont_dim: - for key, vel in cont_dim.items(): - self._cont_dim[f"{self.name}.{key}"] = vel - if inputs: - new_inputs = {} - split_inputs = set( - f"{self.name}.{n}" if "." not in n else n - for n in hlpst.unwrap_splitter(splitter) - if not n.startswith("_") - ) - for inpt_name, inpt_val in inputs.items(): - new_val: ty.Any - if f"{self.name}.{inpt_name}" in split_inputs: # type: ignore - if isinstance(inpt_val, lazy.LazyField): - new_val = inpt_val.split(splitter) - elif isinstance(inpt_val, ty.Iterable) and not isinstance( - inpt_val, (ty.Mapping, str) - ): - new_val = StateArray(inpt_val) - else: - raise TypeError( - f"Could not split {inpt_val} as it is not a sequence type" - ) - else: - new_val = inpt_val - new_inputs[inpt_name] = new_val - # Update the inputs with the new split values - self._spec = attrs.evolve(self._spec, **new_inputs) - self._set_state(splitter=splitter) - # Wrap types of lazy outputs in StateArray types - self._wrap_lzout_types_in_state_arrays() - return self - - def combine( - self, - combiner: ty.Union[ty.List[str], str], - overwrite: bool = False, # **kwargs - ): - """ - Combine inputs parameterized by one or more previous tasks. - - Parameters - ---------- - combiner : list[str] or str - the field or list of inputs to be combined (i.e. not left split) after the - task has been run - overwrite : bool - whether to overwrite an existing combiner on the node - **kwargs : dict[str, Any] - values for the task that will be "combined" before they are provided to the - node - - Returns - ------- - self : TaskDef - a reference to the task - """ - if not isinstance(combiner, (str, list)): - raise Exception("combiner has to be a string or a list") - combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) - if not_split := [ - c for c in combiner if not any(c in s for s in self.state.splitter_rpn) - ]: - raise ValueError( - f"Combiner fields {not_split} for Node {self.name!r} are not in the " - f"splitter fields {self.splitter}" - ) - if ( - self._state - and self._state.combiner - and combiner != self._state.combiner - and not overwrite - ): - raise Exception( - "combiner has been already set, " - "if you want to overwrite it - use overwrite=True" - ) - self._set_state(combiner=combiner) - self._wrap_lzout_types_in_state_arrays() - return self - @property def cont_dim(self): # adding inner_cont_dim to the general container_dimension provided by the users @@ -276,47 +175,6 @@ def combiner(self): return () return self._state.combiner - def _get_tasks( - self, - cache_locations: Path | list[Path], - state_index: int | None = None, - return_inputs: bool = False, - ) -> list["Task"]: - raise NotImplementedError - if self.state: - if state_index is None: - # if state_index=None, collecting all results - if self.state.combiner: - return self._combined_output(return_inputs=return_inputs) - else: - results = [] - for ind in range(len(self.state.inputs_ind)): - checksum = self.checksum_states(state_index=ind) - result = load_result(checksum, cache_locations) - if result is None: - return None - results.append(result) - if return_inputs is True or return_inputs == "val": - return list(zip(self.state.states_val, results)) - elif return_inputs == "ind": - return list(zip(self.state.states_ind, results)) - else: - return results - else: # state_index is not None - if self.state.combiner: - return self._combined_output(return_inputs=return_inputs)[ - state_index - ] - result = load_result(self.checksum_states(state_index), cache_locations) - if return_inputs is True or return_inputs == "val": - return (self.state.states_val[state_index], result) - elif return_inputs == "ind": - return (self.state.states_ind[state_index], result) - else: - return result - else: - return load_result(self._spec._checksum, cache_locations) - def _checksum_states(self, state_index=None): """ Calculate a checksum for the specific state or all of the states of the task. @@ -329,23 +187,24 @@ def _checksum_states(self, state_index=None): TODO """ - # if is_workflow(self) and self.definition._graph_checksums is attr.NOTHING: - # self.definition._graph_checksums = { + # if is_workflow(self) and self._definition._graph_checksums is attr.NOTHING: + # self._definition._graph_checksums = { # nd.name: nd.checksum for nd in self.graph_sorted # } + from pydra.engine.specs import WorkflowDef if state_index is not None: - inputs_copy = copy(self.definition) + inputs_copy = copy(self._definition) for key, ind in self.state.inputs_ind[state_index].items(): val = self._extract_input_el( - inputs=self.definition, inp_nm=key.split(".")[1], ind=ind + inputs=self._definition, inp_nm=key.split(".")[1], ind=ind ) setattr(inputs_copy, key.split(".")[1], val) # setting files_hash again in case it was cleaned by setting specific element # that might be important for outer splitter of input variable with big files # the file can be changed with every single index even if there are only two files input_hash = inputs_copy.hash - if isinstance(self._spec, WorkflowDef): + if isinstance(self._definition, WorkflowDef): con_hash = hash_function(self._connections) # TODO: hash list is not used hash_list = [input_hash, con_hash] # noqa: F841 @@ -358,7 +217,7 @@ def _checksum_states(self, state_index=None): else: checksum_list = [] if not hasattr(self.state, "inputs_ind"): - self.state.prepare_states(self.definition, cont_dim=self.cont_dim) + self.state.prepare_states(self._definition, cont_dim=self.cont_dim) self.state.prepare_inputs() for ind in range(len(self.state.inputs_ind)): checksum_list.append(self._checksum_states(state_index=ind)) @@ -432,3 +291,47 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: # additional field name should be added to the list of fields upstream_states[node.name][1].append(inpt_name) return upstream_states + + def _extract_input_el(self, inputs, inp_nm, ind): + """ + Extracting element of the inputs taking into account + container dimension of the specific element that can be set in self.cont_dim. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. + If + """ + if f"{self.name}.{inp_nm}" in self.cont_dim: + return list( + hlpst.flatten( + ensure_list(getattr(inputs, inp_nm)), + max_depth=self.cont_dim[f"{self.name}.{inp_nm}"], + ) + )[ind] + else: + return getattr(inputs, inp_nm)[ind] + + def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: + """Split the definition into the different states it will be run over""" + # TODO: doesn't work properly for more cmplicated wf (check if still an issue) + if not self.state: + return {None: self._definition} + split_defs = {} + for input_ind in self.state.inputs_ind: + inputs_dict = {} + for inp in set(self.input_names): + if f"{self.name}.{inp}" in input_ind: + inputs_dict[inp] = self._extract_input_el( + inputs=self._definition, + inp_nm=inp, + ind=input_ind[f"{self.name}.{inp}"], + ) + split_defs[StateIndex(input_ind)] = attrs.evolve( + self._definition, inputs_dict + ) + return split_defs + + # else: + # # todo it never gets here + # breakpoint() + # inputs_dict = {inp: getattr(self.inputs, inp) for inp in self.input_names} + # return None, inputs_dict diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 9d9f273202..dc19aca68f 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -7,13 +7,13 @@ import itertools import platform import shlex +from collections import Counter import typing as ty from glob import glob from copy import deepcopy from typing_extensions import Self import attrs import cloudpickle as cp -from fileformats.generic import File from pydra.engine.audit import AuditFlag from pydra.utils.typing import TypeParser from .helpers import ( @@ -26,13 +26,18 @@ parse_format_string, ) from .helpers_file import template_update +from . import helpers_state as hlpst +from . import lazy from pydra.utils.hash import hash_function, Cache +from pydra.utils.typing import StateArray from pydra.design.base import Field, Arg, Out, RequirementSet, EMPTY from pydra.design import shell if ty.TYPE_CHECKING: from pydra.engine.core import Task from pydra.engine.task import ShellTask + from pydra.engine.core import Workflow + from pydra.engine.submitter import Submitter def is_set(value: ty.Any) -> bool: @@ -43,7 +48,7 @@ def is_set(value: ty.Any) -> bool: class TaskOutputs: """Base class for all output definitions""" - RESERVED_FIELD_NAMES = ("inputs", "split", "combine") + RESERVED_FIELD_NAMES = ("inputs",) @classmethod def from_task(cls, task: "Task") -> Self: @@ -72,6 +77,53 @@ def inputs(self): """The inputs object associated with a lazy-outputs object""" return self._get_node().inputs + def _get_node(self): + try: + return self._node + except AttributeError: + raise AttributeError( + f"{self} outputs object is not a lazy output of a workflow node" + ) from None + + def __iter__(self) -> ty.Generator[str, None, None]: + """Iterate through all the names in the definition""" + return (f.name for f in list_fields(self)) + + def __getitem__(self, name: str) -> ty.Any: + """Return the value for the given attribute, resolving any templates + + Parameters + ---------- + name : str + the name of the attribute to return + + Returns + ------- + Any + the value of the attribute + """ + try: + return getattr(self, name) + except AttributeError: + raise KeyError(f"{self} doesn't have an attribute {name}") from None + + +OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) + + +@attrs.define(kw_only=True, auto_attribs=False) +class TaskDef(ty.Generic[OutputsType]): + """Base class for all task definitions""" + + Task: "ty.Type[core.Task]" + + # The following fields are used to store split/combine state information + _splitter = attrs.field(default=None, init=False) + _combiner = attrs.field(default=None, init=False) + _cont_dim = attrs.field(default=None, init=False) + + RESERVED_FIELD_NAMES = ("split", "combine") + def split( self, splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, @@ -104,15 +156,56 @@ def split( self : TaskBase a reference to the task """ - self._get_node().split( - splitter, overwrite=overwrite, cont_dim=cont_dim, **inputs - ) + if self._splitter and not overwrite: + raise ValueError( + f"Cannot overwrite existing splitter {self._splitter} on {self}, " + "set 'overwrite=True' to do so" + ) + if splitter: + unwraped_split = hlpst.unwrap_splitter(splitter) + if duplicated := [f for f, c in Counter(unwraped_split).items() if c > 1]: + raise ValueError(f"Splitter fields {duplicated} are duplicated") + split_names = set( + s for s in unwraped_split if not s.startswith("_") and "." not in s + ) + input_names = set(inputs) + if missing_inputs := list(split_names - input_names): + raise ValueError( + f"Splitter fields {missing_inputs} need to be provided as a keyword " + f"arguments to the split method (provided {list(inputs)})" + ) + if unrecognised_inputs := list(input_names - split_names): + raise ValueError( + f"Provided inputs {unrecognised_inputs} are not present in the " + f"splitter {splitter}" + ) + else: + # If no splitter is provided, use the names of the inputs as combinatorial splitter + split_names = splitter = list(inputs) + for field_name in cont_dim or []: + if field_name not in split_names: + raise ValueError( + f"Container dimension for {field_name} is provided but the field " + f"is not present in the inputs" + ) + self._splitter = splitter + self._cont_dim = cont_dim + for name, value in inputs.items(): + if isinstance(value, lazy.LazyField): + split_val = value.split(splitter) + elif isinstance(value, ty.Iterable) and not isinstance( + value, (ty.Mapping, str) + ): + split_val = StateArray(value) + else: + raise TypeError(f"Could not split {value} as it is not a sequence type") + setattr(self, name, split_val) return self def combine( self, combiner: ty.Union[ty.List[str], str], - overwrite: bool = False, # **kwargs + overwrite: bool = False, ) -> Self: """ Combine inputs parameterized by one or more previous tasks. @@ -133,49 +226,20 @@ def combine( self : Self a reference to the outputs object """ - self._get_node().combine(combiner, overwrite=overwrite) - return self - - def _get_node(self): - try: - return self._node - except AttributeError: - raise AttributeError( - f"{self} outputs object is not a lazy output of a workflow node" + if self._combiner and not overwrite: + raise ValueError( + f"Attempting to overwrite existing combiner {self._combiner} on {self}, " + "set 'overwrite=True' to do so" ) - - def __iter__(self) -> ty.Generator[str, None, None]: - """Iterate through all the names in the definition""" - return (f.name for f in list_fields(self)) - - def __getitem__(self, name: str) -> ty.Any: - """Return the value for the given attribute, resolving any templates - - Parameters - ---------- - name : str - the name of the attribute to return - - Returns - ------- - Any - the value of the attribute - """ - try: - return getattr(self, name) - except AttributeError: - raise KeyError(f"{self} doesn't have an attribute {name}") from None - - -OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) - - -class TaskDef(ty.Generic[OutputsType]): - """Base class for all task definitions""" - - Task: "ty.Type[core.Task]" - - RESERVED_FIELD_NAMES = () + if isinstance(combiner, str): + combiner = [combiner] + local_names = set(c for c in combiner if "." not in c and not c.startswith("_")) + if unrecognised := local_names - set(self): + raise ValueError( + f"Combiner fields {unrecognised} are not present in the task definition" + ) + self._combiner = combiner + return self def __call__( self, @@ -183,12 +247,10 @@ def __call__( audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cache_locations=None, - inputs: ty.Text | File | dict[str, ty.Any] | None = None, - cont_dim=None, - messenger_args=None, messengers=None, + messenger_args=None, rerun=False, - **kwargs, + **exec_kwargs, ) -> OutputsType: """Create a task from this definition and execute it to produce a result. @@ -199,46 +261,71 @@ def __call__( audit_flags : AuditFlag, optional Auditing configuration, by default AuditFlag.NONE cache_dir : os.PathLike, optional - Cache directory, by default None + Cache directory where the working directory/results for the task will be + stored, by default None cache_locations : list[os.PathLike], optional - Cache locations, by default None - inputs : str or File or dict, optional - Inputs for the task, by default None - cont_dim : dict, optional - Container dimensions for specific inputs, by default None + Alternate cache locations to check for pre-computed results, by default None messenger_args : dict, optional Messenger arguments, by default None messengers : list, optional Messengers, by default None rerun : bool, optional - Whether to rerun the task, by default False - **kwargs - Additional keyword arguments to pass to the task + Whether to force the re-computation of the task results even if existing + results are found, by default False + exec_kwargs : dict + Keyword arguments to pass on to the Submitter object used to execute the task Returns ------- - Outputs - The output interface of the task + OutputsType or list[OutputsType] + The output interface of the task, or in the case of split tasks, a list of + output interfaces """ + from pydra.engine.submitter import Submitter + self._check_rules() - task = self.Task( - definition=self, - name=name, + if self._splitter: + # Create an implicit workflow to hold the split nodes + from pydra.design import workflow + + outputs = {o.name: list[o.type] for o in list_fields(self.Outputs)} + + @workflow.define(outputs=outputs) + def Split(): + node = workflow.add(self) + return tuple(getattr(node, o) for o in outputs) + + definition = Split() + + elif self._combiner: + raise ValueError( + f"Task {self} is marked for combining, but not splitting. " + "Use the `split` method to split the task before combining." + ) + else: + definition = self + + with Submitter( audit_flags=audit_flags, cache_dir=cache_dir, cache_locations=cache_locations, - inputs=inputs, - cont_dim=cont_dim, messenger_args=messenger_args, messengers=messengers, rerun=rerun, - ) - result = task(**kwargs) + **exec_kwargs, + ) as sub: + result = sub(definition) + if result.errored: + raise ValueError(f"Task {definition} failed with an error") return result.output def __iter__(self) -> ty.Generator[str, None, None]: """Iterate through all the names in the definition""" - return (f.name for f in list_fields(self)) + return ( + f.name + for f in list_fields(self) + if not (f.name.startswith("_") or f.name in self.RESERVED_FIELD_NAMES) + ) def __getitem__(self, name: str) -> ty.Any: """Return the value for the given attribute, resolving any templates @@ -430,7 +517,7 @@ def get_output_field(self, field_name): @attrs.define(kw_only=True) -class RuntimeDef: +class RuntimeSpec: """ Specification for a task. @@ -466,14 +553,71 @@ class PythonDef(TaskDef[PythonOutputsType]): class WorkflowOutputs(TaskOutputs): - pass + + @classmethod + def from_task(cls, task: "Task") -> Self: + """Collect the outputs of a workflow task from the outputs of the nodes in the + + Parameters + ---------- + task : Task + The task whose outputs are being collected. + + Returns + ------- + outputs : Outputs + The outputs of the task + """ + outputs = super().from_task(task) + wf = task.definition.construct() + # collecting outputs from tasks + output_wf = {} + for name, lazy_field in wf.outputs.items(): + try: + val_out = lazy_field.get_value(wf) + output_wf[name] = val_out + except (ValueError, AttributeError) as e: + output_wf[name] = None + node = wf[lazy_field.name] + # checking if the tasks has predecessors that raises error + if isinstance(node._errored, list): + raise ValueError(f"Tasks {node._errored} raised an error") + else: + err_files = [(t.output_dir / "_error.pklz") for t in node.tasks] + if not all(err_files): + raise e + raise ValueError( + f"Task {lazy_field.name} raised an error, full crash report is " + f"here: " + + ( + str(err_files[0]) + if len(err_files) == 1 + else "\n" + "\n".join(str(f) for f in err_files) + ) + ) + return attrs.evolve(outputs, **output_wf) WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) +@attrs.define(kw_only=True) class WorkflowDef(TaskDef[WorkflowOutputsType]): - pass + + RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("construct",) + + _constructed = attrs.field(default=None, init=False) + + def construct(self) -> "Workflow": + from pydra.engine.core import Workflow + + if self._constructed is not None: + return self._constructed + self._constructed = Workflow.construct(self) + return self._constructed + + async def _run(self, task: "Task", submitter: "Submitter") -> Result: + await submitter.expand_workflow(task) RETURN_CODE_HELP = """The process' exit code.""" @@ -592,7 +736,7 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellDef") -> bool: class ShellDef(TaskDef[ShellOutputsType]): - RESERVED_FIELD_NAMES = ("cmdline",) + RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) @property def cmdline(self) -> str: diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 58a1a68b0a..84e131b57b 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -2,7 +2,9 @@ from copy import deepcopy import itertools +from collections import OrderedDict from functools import reduce +import typing as ty import attrs from . import helpers_state as hlpst from .helpers import ensure_list, attrs_values @@ -13,6 +15,38 @@ op = {".": zip, "*": itertools.product} +OutputsType = ty.TypeVar("OutputsType") + + +class StateIndex: + """The collection of state indices that identifies a single element within the list + of tasks generated from a node + + Parameters + ---------- + indices : dict[str, int] + a dictionary of indices for each input field + """ + + def __init__(self, indices: dict[str, int] | None = None): + # We used ordered dict here to ensure the keys are always in the same order + # while OrderedDict is not strictly necessary for CPython 3.7+, we use it to + # signal that the order of the keys is important + if indices is None: + self.indices = OrderedDict() + else: + self.indices = OrderedDict(sorted(indices)) + + def __hash__(self): + return hash(tuple(self.indices.items())) + + def __eq__(self, other): + return self.indices == other.indices + + def __str__(self): + return "__".join(f"{n}-{i}" for n, i in self.indices.items()) + + class State: """ A class that specifies a State of all tasks. @@ -1116,3 +1150,20 @@ def _single_op_splits(self, op_single): val = op["*"](val_ind) keys = [op_single] return val, keys + + # def split(self, task_def: TaskDef[OutputsType]) -> list["TaskDef[OutputsType]"]: + # """ + # Split the task definition containing state-array fields into multiple tasks + # without splitters and non-state-array values. + + # Parameters + # ---------- + # task_def: TaskDef + # a task definition + + # Returns + # ------- + # List[TaskDef] + # a list of task definitions + # """ + # return hlpst.map_splits(self.states_ind, task_def, cont_dim=task_def._cont_dim) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 6f5a7d0290..74a3726b47 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -3,22 +3,49 @@ import asyncio import typing as ty import pickle -from uuid import uuid4 +import os +from pathlib import Path +from tempfile import mkdtemp +from copy import copy +from collections import defaultdict from .workers import Worker, WORKERS from .core import is_workflow +from .graph import DiGraph from .helpers import get_open_loop, load_and_run_async from pydra.utils.hash import PersistentCache +from .state import StateIndex +from .audit import Audit +from .core import Task +from pydra.utils.messenger import AuditFlag, Messenger import logging logger = logging.getLogger("pydra.submitter") +if ty.TYPE_CHECKING: + from .node import Node + from .specs import TaskDef, WorkflowDef + from .environments import Environment + +# Used to flag development mode of Audit +develop = False + -# TODO: runnable in init or run class Submitter: """Send a task to the execution backend.""" - def __init__(self, plugin: ty.Union[str, ty.Type[Worker]] = "cf", **kwargs): + def __init__( + self, + worker: ty.Union[str, ty.Type[Worker]] = "cf", + cache_dir: os.PathLike | None = None, + cache_locations: list[os.PathLike] | None = None, + environment: "Environment | None" = None, + audit_flags: AuditFlag = AuditFlag.NONE, + messengers: ty.Iterable[Messenger] | None = None, + messenger_args: dict[str, ty.Any] | None = None, + rerun: bool = False, + **kwargs, + ): """ Initialize task submission. @@ -31,41 +58,46 @@ def __init__(self, plugin: ty.Union[str, ty.Type[Worker]] = "cf", **kwargs): Additional keyword arguments to pass to the worker. """ + + self.audit = Audit( + audit_flags=audit_flags, + messengers=messengers, + messenger_args=messenger_args, + develop=develop, + ) + self.cache_dir = cache_dir + self.cache_locations = cache_locations + self.environment = environment + self.rerun = rerun self.loop = get_open_loop() self._own_loop = not self.loop.is_running() - if isinstance(plugin, str): - self.plugin = plugin + if isinstance(worker, str): + self.plugin = worker try: worker_cls = WORKERS[self.plugin] except KeyError: raise NotImplementedError(f"No worker for '{self.plugin}' plugin") else: try: - self.plugin = plugin.plugin_name + self.plugin = worker.plugin_name except AttributeError: raise ValueError("Worker class must have a 'plugin_name' str attribute") - worker_cls = plugin + worker_cls = worker self.worker = worker_cls(**kwargs) self.worker.loop = self.loop - def __call__(self, runnable, cache_locations=None, rerun=False, environment=None): + def __call__( + self, + task_def: "TaskDef", + ): """Submitter run function.""" - from pydra.engine.core import TaskDef - - if cache_locations is not None: - runnable.cache_locations = cache_locations - if isinstance(runnable, TaskDef): - runnable = runnable.Task( - runnable, - cache_locations=cache_locations, - ) - self.loop.run_until_complete( - self.submit_from_call(runnable, rerun, environment) - ) + + task = Task(task_def, submitter=self, name="task") + self.loop.run_until_complete(self.submit_from_call(task)) PersistentCache().clean_up() - return runnable.result() + return task.result() - async def submit_from_call(self, runnable, rerun, environment): + async def submit_from_call(self, task: "Task"): """ This coroutine should only be called once per Submitter call, and serves as the bridge between sync/async lands. @@ -79,26 +111,25 @@ async def submit_from_call(self, runnable, rerun, environment): Once Python 3.10 is the minimum, this should probably be refactored into using structural pattern matching. """ - if is_workflow(runnable): # TODO: env to wf + if is_workflow(task): # TODO: env to wf # connect and calculate the checksum of the graph before running - runnable._connect_and_propagate_to_tasks(override_task_caches=True) + task._create_graph_connections() # override_task_caches=True) # 0 - if runnable.plugin and runnable.plugin != self.plugin: + if task.plugin and task.plugin != self.plugin: # if workflow has a different plugin it's treated as a single element - await self.worker.run_el(runnable, rerun=rerun) + await self.worker.run(task, rerun=self.rerun) # 1 - if runnable.state is None: - await runnable._run(self, rerun=rerun) - # 3 - else: - await self.expand_runnable(runnable, wait=True, rerun=rerun) - runnable._reset() + # if runnable.state is None: + # await runnable._run(self, rerun=rerun) + # # 3 + # else: + await self.expand_runnable(task, wait=True) else: # 2 - await self.expand_runnable(runnable, wait=True, rerun=rerun) # TODO + await self.expand_runnable(task, wait=True) # TODO return True - async def expand_runnable(self, runnable, wait=False, rerun=False): + async def expand_runnable(self, runnable: "Task", wait=False): """ This coroutine handles state expansion. @@ -132,10 +163,10 @@ async def expand_runnable(self, runnable, wait=False, rerun=False): # job has no state anymore futures.add( # This unpickles and runs workflow - why are we pickling? - asyncio.create_task(load_and_run_async(task_pkl, self, rerun)) + asyncio.create_task(load_and_run_async(task_pkl, self, self.rerun)) ) else: - futures.add(self.worker.run_el((task_pkl, runnable), rerun=rerun)) + futures.add(self.worker.run((task_pkl, runnable), rerun=self.rerun)) if wait and futures: # if wait is True, we are at the end of the graph / state expansion. @@ -145,40 +176,37 @@ async def expand_runnable(self, runnable, wait=False, rerun=False): # pass along futures to be awaited independently return futures - async def expand_workflow(self, wf, rerun=False): + async def expand_workflow(self, task: "Task[WorkflowDef]"): """ Expand and execute a stateless :class:`~pydra.engine.core.Workflow`. This method is only reached by `Workflow._run_task`. Parameters ---------- - wf : :obj:`~pydra.engine.core.Workflow` + task : :obj:`~pydra.engine.core.WorkflowTask` Workflow Task object Returns ------- - wf : :obj:`pydra.engine.core.Workflow` + wf : :obj:`pydra.engine.workflow.Workflow` The computed workflow """ - # creating a copy of the graph that will be modified - # the copy contains new lists with original runnable objects - graph_copy = wf.graph.copy() - # resetting uid for nodes in the copied workflows - for nd in graph_copy.nodes: - nd._uid = uuid4().hex + wf = task.definition.construct() + # Generate the execution graph + exec_graph = wf.execution_graph(submitter=self) # keep track of pending futures task_futures = set() - tasks, tasks_follow_errored = get_runnable_tasks(graph_copy) - while tasks or task_futures or graph_copy.nodes: + tasks = self.get_runnable_tasks(exec_graph) + while tasks or task_futures or any(not n.done for n in exec_graph.nodes): if not tasks and not task_futures: # it's possible that task_futures is empty, but not able to get any # tasks from graph_copy (using get_runnable_tasks) # this might be related to some delays saving the files - # so try to get_runnable_tasks for another minut + # so try to get_runnable_tasks for another minute ii = 0 - while not tasks and graph_copy.nodes: - tasks, follow_err = get_runnable_tasks(graph_copy) + while not tasks and exec_graph.nodes: + tasks, follow_err = self.get_runnable_tasks(exec_graph) ii += 1 # don't block the event loop! await asyncio.sleep(1) @@ -189,11 +217,11 @@ async def expand_workflow(self, wf, rerun=False): "results predecessors:\n\n" ) # Get blocked tasks and the predecessors they are waiting on - outstanding = { + outstanding: dict[Task, list[Task]] = { t: [ - p for p in graph_copy.predecessors[t.name] if not p.done + p for p in exec_graph.predecessors[t.name] if not p.done ] - for t in graph_copy.sorted_nodes + for t in exec_graph.sorted_nodes } hashes_have_changed = False @@ -236,25 +264,18 @@ async def expand_workflow(self, wf, rerun=False): # grab inputs if needed logger.debug(f"Retrieving inputs for {task}") # TODO: add state idx to retrieve values to reduce waiting - task.inputs.retrieve_values(wf) + task.definition._retrieve_values(wf) if task.state: - for fut in await self.expand_runnable(task, rerun=rerun): + for fut in await self.expand_runnable(task): task_futures.add(fut) # expand that workflow elif is_workflow(task): - await task._run(self, rerun=rerun) + await task.run(self) # single task else: - task_futures.add(self.worker.run_el(task, rerun=rerun)) + task_futures.add(self.worker.run(task, rerun=self.rerun)) task_futures = await self.worker.fetch_finished(task_futures) - tasks, follow_err = get_runnable_tasks(graph_copy) - # updating tasks_errored - for key, val in follow_err.items(): - tasks_follow_errored.setdefault(key, []) - tasks_follow_errored[key] += val - - for key, val in tasks_follow_errored.items(): - setattr(getattr(wf, key), "_errored", val) + tasks = self.get_runnable_tasks(exec_graph) return wf def __enter__(self): @@ -274,67 +295,223 @@ def close(self): if self._own_loop: self.loop.close() + def get_runnable_tasks( + self, + graph: DiGraph, + ) -> tuple[list["Task"], dict["NodeExecution", list[str]]]: + """Parse a graph and return all runnable tasks. -def get_runnable_tasks(graph): - """Parse a graph and return all runnable tasks.""" - tasks = [] - to_remove = [] - # tasks that follow task that raises an error - following_err = dict() - for tsk in graph.sorted_nodes: - if tsk not in graph.sorted_nodes: - continue - # since the list is sorted (breadth-first) we can stop - # when we find a task that depends on any task that is already in tasks - if set(graph.predecessors[tsk.name]).intersection(set(tasks)): - break - _is_runnable = is_runnable(graph, tsk) - if _is_runnable is True: - tasks.append(tsk) - to_remove.append(tsk) - elif _is_runnable is False: - continue - else: # a previous task had an error - errored_task = _is_runnable - # removing all successors of the errored task - for task_err in errored_task: - task_to_remove = graph.remove_successors_nodes(task_err) - for tsk in task_to_remove: - # adding tasks that were removed from the graph - # due to the error in the errored_task - following_err.setdefault(tsk, []) - following_err[tsk].append(task_err.name) - - # removing tasks that are ready to run from the graph - for nd in to_remove: - graph.remove_nodes(nd) - return tasks, following_err - - -def is_runnable(graph, obj): - """Check if a task within a graph is runnable.""" - connections_to_remove = [] - pred_errored = [] - is_done = None - for pred in graph.predecessors[obj.name]: + Parameters + ---------- + graph : :obj:`~pydra.engine.graph.DiGraph` + Graph object + + Returns + ------- + tasks : list of :obj:`~pydra.engine.core.Task` + List of runnable tasks + following_err : dict[NodeToExecute, list[str]] + Dictionary of tasks that are blocked by errored tasks + """ + tasks = [] + not_started = set() + node: NodeExecution + for node in graph.sorted_nodes: + if node.done: + continue + # since the list is sorted (breadth-first) we can stop + # when we find a task that depends on any task that is already in tasks + if set(graph.predecessors[node.name]).intersection(not_started): + break + # Record if the node has not been started + if not node.started: + not_started.add(node) + tasks.extend(node.get_runnable_tasks(graph, self)) + return tasks + + @property + def cache_dir(self): + """Get the location of the cache directory.""" + return self._cache_dir + + @cache_dir.setter + def cache_dir(self, location): + if location is not None: + self._cache_dir = Path(location).resolve() + self._cache_dir.mkdir(parents=False, exist_ok=True) + else: + self._cache_dir = mkdtemp() + self._cache_dir = Path(self._cache_dir).resolve() + + +class NodeExecution: + """A wrapper around a workflow node containing the execution state of the tasks that + are generated from it""" + + name: str + node: "Node" + submitter: Submitter + + # List of tasks that were completed successfully + successful: dict[StateIndex | None, list["Task"]] + # List of tasks that failed + errored: dict[StateIndex | None, "Task"] + # List of tasks that couldn't be run due to upstream errors + unrunnable: dict[StateIndex | None, list["Task"]] + # List of tasks that are running + running: dict[StateIndex | None, "Task"] + # List of tasks that are waiting on other tasks to complete before they can be run + waiting: dict[StateIndex | None, "Task"] + + _tasks: dict[StateIndex | None, "Task"] | None + + def __init__(self, node: "Node", submitter: Submitter): + self.name = node.name + self.node = node + self.submitter = submitter + # Initialize the state dictionaries + self._tasks = None + self.waiting = [] + self.successful = [] + self.errored = [] + self.running = [] + self.unrunnable = defaultdict(list) + self.state_names = self.node.state_names + + def __getattr__(self, name: str) -> ty.Any: + """Delegate attribute access to the underlying node.""" + return getattr(self.node, name) + + @property + def tasks(self) -> ty.Iterable["Task"]: + if self._tasks is None: + self._tasks = {t.state_index: t for t in self._generate_tasks()} + return self._tasks.values() + + def task(self, index: StateIndex | None = None) -> "Task": + """Get a task object for a given state index.""" + self.tasks # Ensure tasks are loaded try: - is_done = pred.done - except ValueError: - pred_errored.append(pred) + return self._tasks[index] + except KeyError: + if index is None: + raise KeyError( + f"{self!r} has been split, so a state index must be provided" + ) from None + raise + + @property + def started(self) -> bool: + return ( + self.successful + or self.errored + or self.unrunnable + or self.running + or self.waiting + ) + + @property + def done(self) -> bool: + return self.started and not (self.running or self.waiting) + + @property + def all_failed(self) -> bool: + return (self.unrunnable or self.errored) and not ( + self.successful or self.waiting or self.running + ) + + def _generate_tasks(self) -> ty.Iterable["Task"]: + if self.node.state is None: + yield Task( + definition=self.node._definition, + submitter=self.submitter, + name=self.node.name, + ) + else: + for index, split_defn in self.node._split_definition().items(): + yield Task( + definition=split_defn, + submitter=self.submitter, + name=self.node.name, + state_index=index, + ) - if is_done is True: - connections_to_remove.append(pred) - elif is_done is False: - return False + # if state_index is None: + # # if state_index=None, collecting all results + # if self.node.state.combiner: + # return self._combined_output(return_inputs=return_inputs) + # else: + # results = [] + # for ind in range(len(self.node.state.inputs_ind)): + # checksum = self.checksum_states(state_index=ind) + # result = load_result(checksum, cache_locations) + # if result is None: + # return None + # results.append(result) + # if return_inputs is True or return_inputs == "val": + # return list(zip(self.node.state.states_val, results)) + # elif return_inputs == "ind": + # return list(zip(self.node.state.states_ind, results)) + # else: + # return results + # else: # state_index is not None + # if self.node.state.combiner: + # return self._combined_output(return_inputs=return_inputs)[ + # state_index + # ] + # result = load_result(self.checksum_states(state_index), cache_locations) + # if return_inputs is True or return_inputs == "val": + # return (self.node.state.states_val[state_index], result) + # elif return_inputs == "ind": + # return (self.node.state.states_ind[state_index], result) + # else: + # return result + # else: + # return load_result(self._definition._checksum, cache_locations) + + def get_runnable_tasks(self, graph: DiGraph) -> list["Task"]: + """For a given node, check to see which tasks have been successfully run, are ready + to run, can't be run due to upstream errors, or are waiting on other tasks to complete. - if pred_errored: - return pred_errored + Parameters + ---------- + node : :obj:`~pydra.engine.node.Node` + The node object to get the tasks for + graph : :obj:`~pydra.engine.graph.DiGraph` + Graph object - # removing nodes that are done from connections - for nd in connections_to_remove: - graph.remove_nodes_connections(nd) - return True + Returns + ------- + runnable : list[NodeExecution] + List of tasks that are ready to run + """ + runnable: list["Task"] = [] + if not self.started: + self.waiting = copy(self._tasks) + # Check to see if any previously running tasks have completed + for index, task in copy(self.running.items()): + if task.done: + self.successful[task.state_index] = self.running.pop(index) + elif task.errored: + self.errored[task.state_index] = self.running.pop(index) + # Check to see if any waiting tasks are now runnable/unrunnable + for index, task in copy(self.waiting.items()): + pred: NodeExecution + is_runnable = True + for pred in graph.predecessors[self.node.name]: + if index not in pred.successful: + is_runnable = False + if index in pred.errored: + self.unrunnable[index].append(self.waiting.pop(index)) + if index in pred.unrunnable: + self.unrunnable[index].extend(pred.unrunnable[index]) + self.waiting.pop(index) + break + if is_runnable: + runnable.append(self.waiting.pop(index)) + self.running.update({t.state_index: t for t in runnable}) + return runnable async def prepare_runnable(runnable): diff --git a/pydra/engine/task.py b/pydra/engine/task.py index f5eb253f31..bfea780803 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -211,7 +211,7 @@ def _prepare_bindings(self, root: str): env_path / rel for rel in fileset.relative_fspaths ) - def resolve_value( + def resolve_output_value( self, fld: "shell.out", stdout: str, @@ -277,7 +277,7 @@ def generated_output_names(self, stdout: str, stderr: str): # assuming that field should have either default or metadata, but not both if is_set(fld.default): output_names.append(fld.name) - elif is_set(self.resolve_value(fld, stdout, stderr)): + elif is_set(self.resolve_output_value(fld, stdout, stderr)): output_names.append(fld.name) return output_names diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 729307881d..a80089f5a5 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -34,7 +34,7 @@ def test_docker_1(plugin): cmd = "whoami" docky = shell.define(cmd)(environment=Docker(image="busybox")) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: docky(submitter=sub) res = docky.result() diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 5d2aea0d01..64ba2831f8 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -38,7 +38,7 @@ def test_native_1(tmp_path): shelly_subm = ShellTask( name="shelly_subm", executable=cmd, cache_dir=newcache("shelly_subm") ) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: shelly_subm(submitter=sub, environment=Native()) assert env_res == shelly_subm.result().output.__dict__ @@ -97,14 +97,14 @@ def test_docker_1_subm(tmp_path, docker): cache_dir=newcache("shelly_env"), environment=docker, ) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ shelly_call = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: shelly_call(submitter=sub, environment=docker) assert env_res == shelly_call.result().output.__dict__ @@ -155,14 +155,14 @@ def test_singularity_1_subm(tmp_path, plugin): cache_dir=newcache("shelly_env"), environment=sing, ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ shelly_call = ShellTask( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shelly_call(submitter=sub, environment=sing) for key in [ "stdout", @@ -299,14 +299,14 @@ def test_docker_fileinp_subm(tmp_path, plugin): tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] ) shelly_env.environment = docker - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ shelly_call = create_shelly_inputfile( tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shelly_call(submitter=sub, environment=docker) assert env_res == shelly_call.result().output.__dict__ diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 475d4a0ec7..21f008fabb 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -368,7 +368,7 @@ def test_task_nostate_1(plugin_dask_opt, tmp_path): assert np.allclose(nn.inputs.a, [3]) assert nn.state is None - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) # checking the results @@ -409,7 +409,7 @@ def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): assert np.allclose(nn.inputs.a, [3]) assert nn.state is None - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: nn(submitter=sub) # checking the results @@ -457,7 +457,7 @@ def test_task_nostate_2(plugin, tmp_path): assert np.allclose(nn.inputs.lst, [2, 3, 4]) assert nn.state is None - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -473,7 +473,7 @@ def test_task_nostate_3(plugin, tmp_path): nn.cache_dir = tmp_path assert nn.inputs.d == {"a": "ala", "b": "bala"} - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -560,7 +560,7 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): assert np.allclose(nn.inputs.a, [3]) assert nn.state is None - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) # checking the results @@ -579,7 +579,7 @@ def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): assert np.allclose(nn.inputs.a, [3]) assert nn.state is None - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) # checking the results @@ -601,11 +601,11 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn2) # checking the results @@ -629,11 +629,11 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn2, rerun=True) # checking the results @@ -711,12 +711,12 @@ def test_task_nostate_cachelocations_updated(plugin, tmp_path): cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) # updating cache location to non-existing dir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn2, cache_locations=cache_dir1) # checking the results @@ -746,7 +746,7 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): assert nn.state.splitter_rpn == ["NA.a"] assert (nn.inputs.a == np.array([3, 5])).all() - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) # checking the results @@ -787,7 +787,7 @@ def test_task_state_1a(plugin, tmp_path): assert nn.state.splitter_rpn == ["NA.a"] assert (nn.inputs.a == np.array([3, 5])).all() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -811,7 +811,7 @@ def test_task_state_singl_1(plugin, tmp_path): assert nn.state.splitter_final == "NA.a" assert nn.state.splitter_rpn_final == ["NA.a"] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -881,7 +881,7 @@ def test_task_state_2( assert nn.state.splitter_final == state_splitter assert nn.state.splitter_rpn_final == state_rpn - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -918,7 +918,7 @@ def test_task_state_3(plugin, tmp_path): assert nn.state.splitter_rpn == ["NA.a"] assert nn.inputs.a == [] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -943,7 +943,7 @@ def test_task_state_4(plugin, input_type, tmp_path): assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) assert nn.state.splitter == "NA.lst" - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking that split is done across dim 0 @@ -972,7 +972,7 @@ def test_task_state_4a(plugin, tmp_path): assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) assert nn.state.splitter == "NA.lst" - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -996,7 +996,7 @@ def test_task_state_5(plugin, tmp_path): assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) assert nn.state.splitter == ("NA.n", "NA.lst") - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -1023,7 +1023,7 @@ def test_task_state_5_exception(plugin, tmp_path): assert nn.state.splitter == ("NA.n", "NA.lst") with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) assert "shape" in str(excinfo.value) @@ -1039,7 +1039,7 @@ def test_task_state_6(plugin, tmp_path): assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) assert nn.state.splitter == ["NA.n", "NA.lst"] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -1063,7 +1063,7 @@ def test_task_state_6a(plugin, tmp_path): assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) assert nn.state.splitter == ["NA.n", "NA.lst"] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -1090,7 +1090,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): assert nn.state.splitter_final is None assert nn.state.splitter_rpn_final == [] - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] @@ -1228,7 +1228,7 @@ def test_task_state_comb_2( assert nn.state.splitter_rpn == state_rpn assert nn.state.combiner == state_combiner - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) assert nn.state.splitter_final == state_splitter_final @@ -1275,7 +1275,7 @@ def test_task_state_comb_singl_1(plugin, tmp_path): assert nn.state.splitter_final is None assert nn.state.splitter_rpn_final == [] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -1299,7 +1299,7 @@ def test_task_state_comb_3(plugin, tmp_path): assert nn.state.splitter_rpn == ["NA.a"] assert nn.inputs.a == [] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) # checking the results @@ -1463,7 +1463,7 @@ def test_task_state_cachedir(plugin_dask_opt, tmp_path): assert nn.state.splitter == "NA.a" assert (nn.inputs.a == np.array([3, 5])).all() - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) # checking the results @@ -1484,13 +1484,13 @@ def test_task_state_cachelocations(plugin, tmp_path): cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) nn2 = fun_addtwo( name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir ).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn2) # checking the results @@ -1515,13 +1515,13 @@ def test_task_state_cachelocations_forcererun(plugin, tmp_path): cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) nn2 = fun_addtwo( name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir ).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn2, rerun=True) # checking the results @@ -1550,13 +1550,13 @@ def test_task_state_cachelocations_updated(plugin, tmp_path): cache_dir2.mkdir() nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn) nn2 = fun_addtwo(name="NA", cache_dir=cache_dir2, cache_locations=cache_dir).split( splitter="a", a=[3, 5] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(nn2, cache_locations=cache_dir1) # checking the results @@ -1588,13 +1588,13 @@ def test_task_files_cachelocations(plugin_dask_opt, tmp_path): input2.write_text("test") nn = fun_file(name="NA", filename=input1, cache_dir=cache_dir) - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn) nn2 = fun_file( name="NA", filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir ) - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(nn2) # checking the results diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index b0c998666a..e0ecb93dc6 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -28,7 +28,7 @@ def test_multiout(tmpdir): wf.set_output([("array", wf.mo.lzout.b)]) wf.cache_dir = tmpdir - with Submitter(plugin="cf", n_procs=2) as sub: + with Submitter(worker="cf", n_procs=2) as sub: sub(runnable=wf) results = wf.result(return_inputs=True) @@ -46,7 +46,7 @@ def test_multiout_st(tmpdir): wf.set_output([("array", wf.mo.lzout.b)]) wf.cache_dir = tmpdir - with Submitter(plugin="cf", n_procs=2) as sub: + with Submitter(worker="cf", n_procs=2) as sub: sub(runnable=wf) results = wf.result(return_inputs=True) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index b8ee5494a6..2d8362c7f2 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -265,7 +265,7 @@ def test_wf_shell_cmd_1(plugin, tmp_path): wf.set_output([("out", wf.shelly_ls.lzout.stdout)]) wf.cache_dir = tmp_path - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() @@ -1597,7 +1597,7 @@ def test_shell_cmd_inputspec_11(tmp_path): # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_outputspec_7a # see https://github.com/nipype/pydra/issues/671 - with Submitter(plugin="serial") as sub: + with Submitter(worker="serial") as sub: sub(wf) result = wf.result() @@ -2212,7 +2212,7 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): wf.set_output([("out_f", wf.shelly.lzout.out1), ("out", wf.shelly.lzout.stdout)]) - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: wf(submitter=sub) res = wf.result() @@ -2259,7 +2259,7 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): wf.set_output([("out_f", wf.shelly.lzout.out1), ("out", wf.shelly.lzout.stdout)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() @@ -2352,7 +2352,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() @@ -2449,7 +2449,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() @@ -2544,7 +2544,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res_l = wf.result() @@ -2641,7 +2641,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): ] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() @@ -2709,7 +2709,7 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): ) with pytest.raises(Exception) as exinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shelly(submitter=sub) assert "does not exist" in str(exinfo.value) @@ -2751,7 +2751,7 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): ) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shelly(submitter=sub) assert "no file matches" in str(excinfo.value) @@ -3388,7 +3388,7 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): [("stdout", wf.shelly.lzout.stdout), ("newfile", wf.shelly.lzout.newfile)] ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index a1f72d7b43..afde5397e8 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -80,7 +80,7 @@ def test_singularity_2(plugin, tmp_path): ) assert singu.cmdline == " ".join(cmd) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: singu(submitter=sub) res = singu.result() assert res.output.stdout.strip() == " ".join(cmd[1:]) @@ -105,7 +105,7 @@ def test_singularity_2a(plugin, tmp_path): ) assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: singu(submitter=sub) res = singu.result() assert res.output.stdout.strip() == " ".join(cmd_args) @@ -182,7 +182,7 @@ def test_singularity_outputspec_1(plugin, tmp_path): cache_dir=tmp_path, ) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: singu(submitter=sub) res = singu.result() @@ -662,7 +662,7 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): wf.set_output([("out", wf.singu.lzout.stdout)]) - with Submitter(plugin="serial") as sub: + with Submitter(worker="serial") as sub: wf(submitter=sub) res = wf.result() @@ -718,7 +718,7 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): wf.set_output([("out", wf.singu.lzout.stdout)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() @@ -774,7 +774,7 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): wf.set_output([("out", wf.singu.lzout.stdout)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) res = wf.result() diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 63a5c8cf3e..d7dbb6ad03 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -149,7 +149,7 @@ def test_wf2(plugin_dask_opt, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(wf) res = wf.result() @@ -166,7 +166,7 @@ def test_wf_with_state(plugin_dask_opt, tmpdir): wf.set_output([("out", wf.taskb.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(wf) res = wf.result() @@ -702,7 +702,7 @@ def test_byo_worker(): task1 = add_env_var_task(x=1) - with Submitter(plugin=BYOAddVarWorker, add_var=10) as sub: + with Submitter(worker=BYOAddVarWorker, add_var=10) as sub: assert sub.plugin == "byo_add_env_var" result = task1(submitter=sub) @@ -710,7 +710,7 @@ def test_byo_worker(): task2 = add_env_var_task(x=2) - with Submitter(plugin="serial") as sub: + with Submitter(worker="serial") as sub: result = task2(submitter=sub) assert result.output.out == 2 @@ -719,7 +719,7 @@ def test_byo_worker(): def test_bad_builtin_worker(): with pytest.raises(NotImplementedError, match="No worker for 'bad-worker' plugin"): - Submitter(plugin="bad-worker") + Submitter(worker="bad-worker") def test_bad_byo_worker(): @@ -730,4 +730,4 @@ class BadWorker: with pytest.raises( ValueError, match="Worker class must have a 'plugin_name' str attribute" ): - Submitter(plugin=BadWorker) + Submitter(worker=BadWorker) diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index 2ed98a1388..daf846b312 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -70,7 +70,7 @@ def test_task_1(tmpdir): np.save(file, arr) nn = file_add2(name="add2", file=file) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) # checking the results @@ -93,7 +93,7 @@ def test_wf_1(tmpdir): np.save(file_orig, arr) wf.inputs.file_orig = file_orig - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(wf) assert wf.output_dir.exists() @@ -113,7 +113,7 @@ def test_file_annotation_1(tmpdir): np.save(file, arr) nn = file_add2_annot(name="add2", file=file) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) # checking the results @@ -129,7 +129,7 @@ def test_broken_file(tmpdir): nn = file_add2(name="add2", file=file) with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) with pytest.raises(FileNotFoundError, match="do not exist"): @@ -153,7 +153,7 @@ def test_broken_file_link(tmpdir): # raises error inside task # unless variable is defined as a File pydra will treat it as a string with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) with pytest.raises(FileNotFoundError, match="do not exist"): @@ -168,7 +168,7 @@ def test_broken_dir(): # raises error inside task # unless variable is defined as a File pydra will treat it as a string with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) # raises error before task is run @@ -190,7 +190,7 @@ def test_broken_dir_link1(tmpdir): nn = dir_count_file(name="listdir", dirpath=Path(dir1)) # raises error while running task with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) with pytest.raises(FileNotFoundError): @@ -212,9 +212,9 @@ def test_broken_dir_link2(tmpdir): nn = dir_count_file(name="listdir", dirpath=dir2) # does not raises error because pydra treats dirpath as a string - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn) nn2 = dir_count_file_annot(name="listdir", dirpath=str(dir2)) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(nn2) diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 39f005ccec..8c226d0f98 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -143,7 +143,7 @@ def test_wf_no_output(plugin, tmpdir): wf.inputs.x = 2 with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "Workflow output cannot be None" in str(excinfo.value) @@ -157,7 +157,7 @@ def test_wf_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.checksum == checksum_before @@ -177,7 +177,7 @@ def test_wf_1a_outpastuple(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -193,7 +193,7 @@ def test_wf_1_call_subm(plugin, tmpdir): wf.inputs.x = 2 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) results = wf.result() @@ -240,7 +240,7 @@ def test_wf_1_call_exception(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: with pytest.raises(Exception) as e: wf(submitter=sub, plugin=plugin) assert "Defify submitter OR plugin" in str(e.value) @@ -276,7 +276,7 @@ def test_wf_2(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -298,7 +298,7 @@ def test_wf_2a(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -321,7 +321,7 @@ def test_wf_2b(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -345,7 +345,7 @@ def test_wf_2c_multoutp(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -370,7 +370,7 @@ def test_wf_2d_outpasdict(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -391,7 +391,7 @@ def test_wf_3(plugin_dask_opt, tmpdir): wf.inputs.y = None wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(wf) assert wf.output_dir.exists() @@ -414,7 +414,7 @@ def test_wf_3a_exception(plugin, tmpdir): wf.cache_dir = tmpdir with pytest.raises(TypeError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "unsupported" in str(excinfo.value) @@ -428,7 +428,7 @@ def test_wf_4(plugin, tmpdir): wf.inputs.x = 2 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -448,7 +448,7 @@ def test_wf_4a(plugin, tmpdir): wf.inputs.x = 2 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -465,7 +465,7 @@ def test_wf_5(plugin, tmpdir): wf.set_output([("out_sum", wf.addsub.lzout.sum), ("out_sub", wf.addsub.lzout.sub)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -483,7 +483,7 @@ def test_wf_5a(plugin, tmpdir): wf.set_output([("out_sub", wf.addsub.lzout.sub)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -512,7 +512,7 @@ def test_wf_6(plugin, tmpdir): wf.set_output([("out1", wf.mult.lzout.out), ("out2", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -532,7 +532,7 @@ def test_wf_6a(plugin, tmpdir): wf.set_output([("out2", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -551,7 +551,7 @@ def test_wf_st_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin="serial") as sub: + with Submitter(worker="serial") as sub: sub(wf) assert wf.checksum == checksum_before @@ -574,7 +574,7 @@ def test_wf_st_1_call_subm(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: wf(submitter=sub) results = wf.result() @@ -691,7 +691,7 @@ def test_wf_st_noinput_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.checksum == checksum_before @@ -710,7 +710,7 @@ def test_wf_ndst_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.checksum == checksum_before @@ -731,7 +731,7 @@ def test_wf_ndst_updatespl_1(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -753,7 +753,7 @@ def test_wf_ndst_updatespl_1a(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -777,7 +777,7 @@ def test_wf_ndst_updateinp_1(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -796,7 +796,7 @@ def test_wf_ndst_noinput_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.checksum == checksum_before @@ -815,7 +815,7 @@ def test_wf_st_2(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -836,7 +836,7 @@ def test_wf_ndst_2(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -857,7 +857,7 @@ def test_wf_st_3(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) expected = [ @@ -903,7 +903,7 @@ def test_wf_ndst_3(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -924,7 +924,7 @@ def test_wf_st_4(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -950,7 +950,7 @@ def test_wf_ndst_4(plugin, tmpdir): wf.inputs.a = [1, 2] wf.inputs.b = [11, 12] - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -972,7 +972,7 @@ def test_wf_st_5(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -996,7 +996,7 @@ def test_wf_ndst_5(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1019,7 +1019,7 @@ def test_wf_st_6(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1045,7 +1045,7 @@ def test_wf_ndst_6(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1066,7 +1066,7 @@ def test_wf_ndst_7(plugin, tmpdir): wf.set_output([("out", wf.iden.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1088,7 +1088,7 @@ def test_wf_ndst_8(plugin, tmpdir): wf.set_output([("out", wf.iden.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1113,7 +1113,7 @@ def test_wf_ndst_9(plugin, tmpdir): wf.set_output([("out", wf.iden.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1137,7 +1137,7 @@ def test_wf_3sernd_ndst_1(plugin, tmpdir): wf.set_output([("out", wf.add2_2nd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # splitter from the first task should propagate to all tasks, @@ -1176,7 +1176,7 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): wf.set_output([("out", wf.add2_2nd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # splitter from the 1st task should propagate and the 2nd task should add one more @@ -1216,7 +1216,7 @@ def test_wf_3nd_st_1(plugin_dask_opt, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(wf) results = wf.result() @@ -1244,7 +1244,7 @@ def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(worker=plugin_dask_opt) as sub: sub(wf) results = wf.result() @@ -1267,7 +1267,7 @@ def test_wf_3nd_st_2(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1301,7 +1301,7 @@ def test_wf_3nd_ndst_2(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin="serial") as sub: + with Submitter(worker="serial") as sub: sub(wf) results = wf.result() @@ -1325,7 +1325,7 @@ def test_wf_3nd_st_3(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1359,7 +1359,7 @@ def test_wf_3nd_ndst_3(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1384,7 +1384,7 @@ def test_wf_3nd_st_4(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1418,7 +1418,7 @@ def test_wf_3nd_ndst_4(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -1447,7 +1447,7 @@ def test_wf_3nd_st_5(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1486,7 +1486,7 @@ def test_wf_3nd_ndst_5(plugin, tmpdir): wf.set_output([("out", wf.addvar.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1517,7 +1517,7 @@ def test_wf_3nd_ndst_6(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1542,7 +1542,7 @@ def test_wf_3nd_ndst_7(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1586,7 +1586,7 @@ def test_wf_3nd_8(tmpdir): ] ) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(wf) res = wf.result() @@ -1615,7 +1615,7 @@ def test_wf_ndstLR_1(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # checking if the splitter is created properly @@ -1645,7 +1645,7 @@ def test_wf_ndstLR_1a(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # checking if the splitter is created properly @@ -1678,7 +1678,7 @@ def test_wf_ndstLR_2(plugin, tmpdir): wf.set_output([("out", wf.addvar.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # checking if the splitter is created properly @@ -1727,7 +1727,7 @@ def test_wf_ndstLR_2a(plugin, tmpdir): wf.set_output([("out", wf.addvar.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # checking if the splitter is created properly @@ -1772,7 +1772,7 @@ def test_wf_ndstinner_1(plugin, tmpdir): wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.add2.state.splitter == "add2.x" @@ -1797,7 +1797,7 @@ def test_wf_ndstinner_2(plugin, tmpdir): wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.mult.state.splitter == "mult.x" @@ -1822,7 +1822,7 @@ def test_wf_ndstinner_3(plugin, tmpdir): wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.mult.state.splitter == ["mult.x", "mult.y"] @@ -1849,7 +1849,7 @@ def test_wf_ndstinner_4(plugin, tmpdir): wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.mult.state.splitter == "mult.x" @@ -1888,7 +1888,7 @@ def test_wf_ndstinner_5(plugin, tmpdir): ) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.mult.state.splitter == ["_list", ["mult.y", "mult.x"]] @@ -1964,7 +1964,7 @@ def test_wf_st_singl_1(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -1988,7 +1988,7 @@ def test_wf_ndst_singl_1(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2011,7 +2011,7 @@ def test_wf_st_singl_2(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2039,7 +2039,7 @@ def test_wf_ndst_singl_2(plugin, tmpdir): wf.set_output([("out", wf.mult.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2066,7 +2066,7 @@ def test_wfasnd_1(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2091,7 +2091,7 @@ def test_wfasnd_wfinp_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.checksum == checksum_before @@ -2117,7 +2117,7 @@ def test_wfasnd_wfndupdate(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2136,7 +2136,7 @@ def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): wfnd.add(add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wfnd.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wfnd) wf = Workflow(name="wf", input_spec=["x"], x=3) @@ -2148,7 +2148,7 @@ def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2162,7 +2162,7 @@ def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): wf_o.set_output([("out", wf_o.wf.lzout.out)]) wf_o.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf_o) results = wf_o.result() @@ -2186,7 +2186,7 @@ def test_wfasnd_st_1(plugin, tmpdir): wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.checksum == checksum_before @@ -2211,7 +2211,7 @@ def test_wfasnd_st_updatespl_1(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2237,7 +2237,7 @@ def test_wfasnd_ndst_1(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2261,7 +2261,7 @@ def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2284,7 +2284,7 @@ def test_wfasnd_wfst_1(plugin, tmpdir): wf.split("x", x=[2, 4]) wf.set_output([("out", wf.wfnd.lzout.out)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2315,7 +2315,7 @@ def test_wfasnd_st_2(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2340,7 +2340,7 @@ def test_wfasnd_wfst_2(plugin, tmpdir): wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2373,7 +2373,7 @@ def test_wfasnd_ndst_3(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin="serial") as sub: + with Submitter(worker="serial") as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2400,7 +2400,7 @@ def test_wfasnd_wfst_3(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2430,7 +2430,7 @@ def test_wfasnd_4(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2455,7 +2455,7 @@ def test_wfasnd_ndst_4(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -2479,7 +2479,7 @@ def test_wfasnd_wfst_4(plugin, tmpdir): wf.split("x", x=[2, 4]) wf.set_output([("out", wf.wfnd.lzout.out)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2506,7 +2506,7 @@ def test_wf_nostate_cachedir(plugin, tmpdir): wf.inputs.x = 2 wf.inputs.y = 3 - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -2530,7 +2530,7 @@ def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): wf.inputs.x = 2 wf.inputs.y = 3 - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -2557,7 +2557,7 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): wf1.inputs.y = 3 t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2577,7 +2577,7 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): wf2.inputs.y = 3 t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -2614,7 +2614,7 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2635,7 +2635,7 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -2674,7 +2674,7 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2697,7 +2697,7 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -2735,7 +2735,7 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2756,7 +2756,7 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -2792,7 +2792,7 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2813,7 +2813,7 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -2850,7 +2850,7 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2871,7 +2871,7 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2, rerun=True) t2 = time.time() - t0 @@ -2908,7 +2908,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2930,7 +2930,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -2971,7 +2971,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -2994,7 +2994,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3035,7 +3035,7 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3059,7 +3059,7 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3096,7 +3096,7 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): wf1.inputs.x = 3 wf1.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) results1 = wf1.result() @@ -3114,7 +3114,7 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): wf2.inputs.x = 2 wf2.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) results2 = wf2.result() @@ -3145,7 +3145,7 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): wf1.inputs.x = 3 wf1.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) results1 = wf1.result() @@ -3160,7 +3160,7 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): # updating cache_locations after adding the tasks wf2.cache_locations = cache_dir1 - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) results2 = wf2.result() @@ -3191,7 +3191,7 @@ def test_wf_state_cachelocations(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3212,7 +3212,7 @@ def test_wf_state_cachelocations(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3255,7 +3255,7 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3276,7 +3276,7 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2, rerun=True) t2 = time.time() - t0 @@ -3320,7 +3320,7 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3342,7 +3342,7 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): wf2.mult.inputs.y = wf2.lzin.y t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3384,7 +3384,7 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): wf1.inputs.y = 3 wf1.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) results1 = wf1.result() @@ -3402,7 +3402,7 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) results2 = wf2.result() @@ -3436,7 +3436,7 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3458,7 +3458,7 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): t0 = time.time() # changing cache_locations to non-existing dir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2, cache_locations=cache_dir1_empty) t2 = time.time() - t0 @@ -3494,7 +3494,7 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): wf1.inputs.y = 3 wf1.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) results1 = wf1.result() @@ -3514,7 +3514,7 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): wf2.inputs.y = 3 wf2.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) results2 = wf2.result() @@ -3549,7 +3549,7 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3572,7 +3572,7 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3614,7 +3614,7 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3637,7 +3637,7 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2, rerun=True) t2 = time.time() - t0 @@ -3677,7 +3677,7 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3699,7 +3699,7 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3740,7 +3740,7 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3763,7 +3763,7 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf2) t2 = time.time() - t0 @@ -3801,7 +3801,7 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3815,7 +3815,7 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): # running workflow the second time t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t2 = time.time() - t0 @@ -3846,7 +3846,7 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t1 = time.time() - t0 @@ -3862,7 +3862,7 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): # running workflow the second time t0 = time.time() - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf1) t2 = time.time() - t0 @@ -3968,7 +3968,7 @@ def test_wf_lzoutall_1(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -3989,7 +3989,7 @@ def test_wf_lzoutall_1a(plugin, tmpdir): wf.inputs.y = 3 wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -4011,7 +4011,7 @@ def test_wf_lzoutall_st_1(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -4033,7 +4033,7 @@ def test_wf_lzoutall_st_1a(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -4062,7 +4062,7 @@ def test_wf_lzoutall_st_2(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -4094,7 +4094,7 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir): wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.output_dir.exists() @@ -4116,7 +4116,7 @@ def test_wf_resultfile_1(plugin, tmpdir): wf.plugin = plugin wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -4137,7 +4137,7 @@ def test_wf_resultfile_2(plugin, tmpdir): wf.plugin = plugin wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -4158,7 +4158,7 @@ def test_wf_resultfile_3(plugin, tmpdir): wf.plugin = plugin wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) results = wf.result() @@ -4182,7 +4182,7 @@ def test_wf_upstream_error1(plugin, tmpdir): wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4200,7 +4200,7 @@ def test_wf_upstream_error2(plugin, tmpdir): wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4220,7 +4220,7 @@ def test_wf_upstream_error3(plugin, tmpdir): wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4235,7 +4235,7 @@ def test_wf_upstream_error4(plugin, tmpdir): wf.set_output([("out", wf.addvar1.lzout.out)]) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "raised an error" in str(excinfo.value) assert "addvar1" in str(excinfo.value) @@ -4254,7 +4254,7 @@ def test_wf_upstream_error5(plugin, tmpdir): wf_main.set_output([("out", wf_main.wf.lzout.wf_out)]) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) @@ -4275,7 +4275,7 @@ def test_wf_upstream_error6(plugin, tmpdir): wf_main.set_output([("out", wf_main.wf.lzout.wf_out)]) with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) @@ -4296,7 +4296,7 @@ def test_wf_upstream_error7(plugin, tmpdir): wf.set_output([("out", wf.addvar3.lzout.out)]) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4318,7 +4318,7 @@ def test_wf_upstream_error7a(plugin, tmpdir): wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4340,7 +4340,7 @@ def test_wf_upstream_error7b(plugin, tmpdir): wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addvar3.lzout.out)]) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4359,7 +4359,7 @@ def test_wf_upstream_error8(plugin, tmpdir): wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addtwo.lzout.out)]) with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "addvar1" in str(excinfo.value) @@ -4386,7 +4386,7 @@ def test_wf_upstream_error9(plugin, tmpdir): wf.plugin = plugin with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4412,7 +4412,7 @@ def test_wf_upstream_error9a(plugin, tmpdir): wf.set_output([("out1", wf.addvar2.lzout.out)]) # , ("out2", wf.addtwo.lzout.out)]) wf.plugin = plugin - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert wf.err._errored is True assert wf.follow_err._errored == ["err"] @@ -4436,7 +4436,7 @@ def test_wf_upstream_error9b(plugin, tmpdir): wf.plugin = plugin with pytest.raises(ValueError) as excinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: sub(wf) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4846,7 +4846,7 @@ def printer(a): wf.set_output([("out1", wf.printer1.lzout.out)]) - with Submitter(plugin="cf", n_procs=6) as sub: + with Submitter(worker="cf", n_procs=6) as sub: sub(wf) res = wf.result() @@ -4900,7 +4900,7 @@ def one_arg_inner(start_number): test_outer.set_output([("res2", test_outer.test_inner.lzout.res)]) - with Submitter(plugin="cf") as sub: + with Submitter(worker="cf") as sub: sub(test_outer) res = test_outer.result() diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index b88dc43348..8d0435cb25 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -43,7 +43,7 @@ def result_submitter(shell_task, plugin): """helper function to return result when running with submitter with specific plugin """ - with Submitter(plugin=plugin) as sub: + with Submitter(worker=plugin) as sub: shell_task(submitter=sub) return shell_task.result() diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 84d71f70b3..67fbdfe211 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -20,7 +20,7 @@ ) import logging - +from pydra.engine.environments import Environment import random logger = logging.getLogger("pydra.worker") @@ -34,7 +34,7 @@ def __init__(self, loop=None): logger.debug(f"Initializing {self.__class__.__name__}") self.loop = loop - def run_el(self, interface, **kwargs): + def run(self, task: "Task", **kwargs): """Return coroutine for task execution.""" raise NotImplementedError @@ -134,18 +134,26 @@ def __init__(self, **kwargs): """Initialize worker.""" logger.debug("Initialize SerialWorker") - def run_el(self, interface, rerun=False, environment=None, **kwargs): + def run( + self, + task: "Task", + rerun: bool = False, + environment: Environment | None = None, + **kwargs, + ): """Run a task.""" - return self.exec_serial(interface, rerun=rerun, environment=environment) + return self.exec_serial(task, rerun=rerun, environment=environment) def close(self): """Return whether the task is finished.""" - async def exec_serial(self, runnable, rerun=False, environment=None): - if isinstance(runnable, Task): - return runnable._run(rerun, environment=environment) + async def exec_serial( + self, task: "Task", rerun: bool = False, environment: Environment | None = None + ): + if isinstance(task, Task): + return task.run(rerun, environment=environment) else: # it could be tuple that includes pickle files with tasks and inputs - task_main_pkl, _ = runnable + task_main_pkl, _ = task return load_and_run(task_main_pkl, rerun, environment=environment) async def fetch_finished(self, futures): @@ -170,10 +178,16 @@ def __init__(self, n_procs=None): # self.loop = asyncio.get_event_loop() logger.debug("Initialize ConcurrentFuture") - def run_el(self, runnable, rerun=False, environment=None, **kwargs): + def run( + self, + task: "Task", + rerun: bool = False, + environment: Environment | None = None, + **kwargs, + ): """Run a task.""" assert self.loop, "No event loop available to submit tasks" - return self.exec_as_coro(runnable, rerun=rerun, environment=environment) + return self.exec_as_coro(task, rerun=rerun, environment=environment) async def exec_as_coro(self, runnable, rerun=False, environment=None): """Run a task (coroutine wrapper).""" @@ -182,9 +196,9 @@ async def exec_as_coro(self, runnable, rerun=False, environment=None): self.pool, runnable._run, rerun, environment ) else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, task_orig = runnable + task_main_pkl, task_orig = runnable res = await self.loop.run_in_executor( - self.pool, load_and_run, task_main_pkl, ind, rerun, environment + self.pool, load_and_run, task_main_pkl, rerun, environment ) return res @@ -223,19 +237,21 @@ def __init__(self, loop=None, max_jobs=None, poll_delay=1, sbatch_args=None): self.sbatch_args = sbatch_args or "" self.error = {} - def run_el(self, runnable, rerun=False, environment=None): + def run( + self, task: "Task", rerun: bool = False, environment: Environment | None = None + ): """Worker submission API.""" - script_dir, batch_script = self._prepare_runscripts(runnable, rerun=rerun) + script_dir, batch_script = self._prepare_runscripts(task, rerun=rerun) if (script_dir / script_dir.parts[1]) == gettempdir(): logger.warning("Temporary directories may not be shared across computers") - if isinstance(runnable, Task): - cache_dir = runnable.cache_dir - name = runnable.name - uid = runnable.uid + if isinstance(task, Task): + cache_dir = task.cache_dir + name = task.name + uid = task.uid else: # runnable is a tuple (ind, pkl file, task) - cache_dir = runnable[-1].cache_dir - name = runnable[-1].name - uid = f"{runnable[-1].uid}_{runnable[0]}" + cache_dir = task[-1].cache_dir + name = task[-1].name + uid = f"{task[-1].uid}_{task[0]}" return self._submit_job(batch_script, name=name, uid=uid, cache_dir=cache_dir) @@ -453,7 +469,9 @@ def __init__( self.default_qsub_args = default_qsub_args self.max_mem_free = max_mem_free - def run_el(self, runnable, rerun=False): # TODO: add env + def run( + self, task: "Task", rerun: bool = False, environment: Environment | None = None + ): # TODO: add env """Worker submission API.""" ( script_dir, @@ -462,17 +480,17 @@ def run_el(self, runnable, rerun=False): # TODO: add env ind, output_dir, task_qsub_args, - ) = self._prepare_runscripts(runnable, rerun=rerun) + ) = self._prepare_runscripts(task, rerun=rerun) if (script_dir / script_dir.parts[1]) == gettempdir(): logger.warning("Temporary directories may not be shared across computers") - if isinstance(runnable, Task): - cache_dir = runnable.cache_dir - name = runnable.name - uid = runnable.uid + if isinstance(task, Task): + cache_dir = task.cache_dir + name = task.name + uid = task.uid else: # runnable is a tuple (ind, pkl file, task) - cache_dir = runnable[-1].cache_dir - name = runnable[-1].name - uid = f"{runnable[-1].uid}_{runnable[0]}" + cache_dir = task[-1].cache_dir + name = task[-1].name + uid = f"{task[-1].uid}_{task[0]}" return self._submit_job( batch_script, @@ -881,20 +899,28 @@ def __init__(self, **kwargs): self.client_args = kwargs logger.debug("Initialize Dask") - def run_el(self, runnable, rerun=False, **kwargs): + def run( + self, + task: "Task", + rerun: bool = False, + environment: Environment | None = None, + **kwargs, + ): """Run a task.""" - return self.exec_dask(runnable, rerun=rerun) + return self.exec_dask(task, rerun=rerun, environment=environment) - async def exec_dask(self, runnable, rerun=False): + async def exec_dask( + self, task: "Task", rerun: bool = False, environment: Environment | None = None + ): """Run a task (coroutine wrapper).""" from dask.distributed import Client async with Client(**self.client_args, asynchronous=True) as client: - if isinstance(runnable, Task): - future = client.submit(runnable._run, rerun) + if isinstance(task, Task): + future = client.submit(task._run, rerun) result = await future else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, task_orig = runnable + ind, task_main_pkl, task_orig = task future = client.submit(load_and_run, task_main_pkl, ind, rerun) result = await future return result @@ -924,9 +950,15 @@ def __init__(self, **kwargs): logger.debug("Initialize PsijWorker") self.psij = psij - def run_el(self, interface, rerun=False, **kwargs): + def run( + self, + task: "Task", + rerun: bool = False, + environment: Environment | None = None, + **kwargs, + ): """Run a task.""" - return self.exec_psij(interface, rerun=rerun) + return self.exec_psij(task, rerun=rerun, environment=environment) def make_spec(self, cmd=None, arg=None): """ @@ -970,7 +1002,9 @@ def make_job(self, definition, attributes): job.definition = definition return job - async def exec_psij(self, runnable, rerun=False): + async def exec_psij( + self, task: "Task", rerun: bool = False, environment: Environment | None = None + ): """ Run a task (coroutine wrapper). @@ -989,18 +1023,18 @@ async def exec_psij(self, runnable, rerun=False): jex = self.psij.JobExecutor.get_instance(self.subtype) absolute_path = Path(__file__).parent - if isinstance(runnable, Task): - cache_dir = runnable.cache_dir + if isinstance(task, Task): + cache_dir = task.cache_dir file_path = cache_dir / "runnable_function.pkl" with open(file_path, "wb") as file: - pickle.dump(runnable._run, file) + pickle.dump(task._run, file) func_path = absolute_path / "run_pickled.py" definition = self.make_spec("python", [func_path, file_path]) else: # it could be tuple that includes pickle files with tasks and inputs - cache_dir = runnable[-1].cache_dir + cache_dir = task[-1].cache_dir file_path_1 = cache_dir / "taskmain.pkl" file_path_2 = cache_dir / "ind.pkl" - ind, task_main_pkl, task_orig = runnable + ind, task_main_pkl, task_orig = task with open(file_path_1, "wb") as file: pickle.dump(task_main_pkl, file) with open(file_path_2, "wb") as file: diff --git a/pydra/engine/workflow/__init__.py b/pydra/engine/workflow/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/pydra/engine/workflow/base.py b/pydra/engine/workflow/base.py deleted file mode 100644 index 1851d526b5..0000000000 --- a/pydra/engine/workflow/base.py +++ /dev/null @@ -1,184 +0,0 @@ -import typing as ty -from copy import copy -from operator import itemgetter -from typing_extensions import Self -import attrs -from pydra.engine.helpers import list_fields, attrs_values, is_lazy -from pydra.engine.specs import TaskDef, TaskOutputs, WorkflowOutputs -from .lazy import LazyInField -from pydra.utils.hash import hash_function -from pydra.utils.typing import TypeParser, StateArray -from .node import Node - - -OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) -WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) - - -@attrs.define(auto_attribs=False) -class Workflow(ty.Generic[WorkflowOutputsType]): - """A workflow, constructed from a workflow definition - - Parameters - ---------- - name : str - The name of the workflow - inputs : TaskDef - The input definition of the workflow - outputs : TaskDef - The output definition of the workflow - """ - - name: str = attrs.field() - inputs: TaskDef[WorkflowOutputsType] = attrs.field() - outputs: WorkflowOutputsType = attrs.field() - _nodes: dict[str, Node] = attrs.field(factory=dict) - - @classmethod - def construct( - cls, - definition: TaskDef[WorkflowOutputsType], - ) -> Self: - """Construct a workflow from a definition, caching the constructed worklow""" - - lazy_inputs = [f for f in list_fields(type(definition)) if f.lazy] - - # Create a cache key by hashing all the non-lazy input values in the definition - # and use this to store the constructed workflow in case it is reused or nested - # and split over within another workflow - lazy_input_names = {f.name for f in lazy_inputs} - non_lazy_vals = tuple( - sorted( - ( - i - for i in attrs_values(definition).items() - if i[0] not in lazy_input_names - ), - key=itemgetter(0), - ) - ) - if lazy_non_lazy_vals := [f for f in non_lazy_vals if is_lazy(f[1])]: - raise ValueError( - f"Lazy input fields {lazy_non_lazy_vals} found in non-lazy fields " - ) - hash_key = hash_function(non_lazy_vals) - if hash_key in cls._constructed: - return cls._constructed[hash_key] - - # Initialise the outputs of the workflow - outputs = definition.Outputs( - **{f.name: attrs.NOTHING for f in attrs.fields(definition.Outputs)} - ) - - # Initialise the lzin fields - lazy_spec = copy(definition) - wf = cls.under_construction = Workflow( - name=type(definition).__name__, - inputs=lazy_spec, - outputs=outputs, - ) - for lzy_inpt in lazy_inputs: - setattr( - lazy_spec, - lzy_inpt.name, - LazyInField( - workflow=wf, - field=lzy_inpt.name, - type=lzy_inpt.type, - ), - ) - - input_values = attrs_values(lazy_spec) - constructor = input_values.pop("constructor") - cls._under_construction = wf - try: - # Call the user defined constructor to set the outputs - output_lazy_fields = constructor(**input_values) - # Check to see whether any mandatory inputs are not set - for node in wf.nodes: - node._spec._check_rules() - # Check that the outputs are set correctly, either directly by the constructor - # or via returned values that can be zipped with the output names - if output_lazy_fields: - if not isinstance(output_lazy_fields, (list, tuple)): - output_lazy_fields = [output_lazy_fields] - output_fields = list_fields(definition.Outputs) - if len(output_lazy_fields) != len(output_fields): - raise ValueError( - f"Expected {len(output_fields)} outputs, got " - f"{len(output_lazy_fields)} ({output_lazy_fields})" - ) - for outpt, outpt_lf in zip(output_fields, output_lazy_fields): - # Automatically combine any uncombined state arrays into lists - if TypeParser.get_origin(outpt_lf.type) is StateArray: - outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] - setattr(outputs, outpt.name, outpt_lf) - else: - if unset_outputs := [ - a for a, v in attrs_values(outputs).items() if v is attrs.NOTHING - ]: - raise ValueError( - f"Expected outputs {unset_outputs} to be set by the " - f"constructor of {wf!r}" - ) - finally: - cls._under_construction = None - - cls._constructed[hash_key] = wf - - return wf - - @classmethod - def clear_cache(cls): - """Clear the cache of constructed workflows""" - cls._constructed.clear() - - def add(self, task_spec: TaskDef[OutputsType], name=None) -> OutputsType: - """Add a node to the workflow - - Parameters - ---------- - task_spec : TaskDef - The definition of the task to add to the workflow as a node - name : str, optional - The name of the node, by default it will be the name of the task definition - class - - Returns - ------- - OutputType - The outputs definition of the node - """ - if name is None: - name = type(task_spec).__name__ - if name in self._nodes: - raise ValueError(f"Node with name {name!r} already exists in the workflow") - node = Node[OutputsType](name=name, definition=task_spec, workflow=self) - self._nodes[name] = node - return node.lzout - - def __getitem__(self, key: str) -> Node: - return self._nodes[key] - - @property - def nodes(self) -> ty.Iterable[Node]: - return self._nodes.values() - - @property - def node_names(self) -> list[str]: - return list(self._nodes) - - @property - @classmethod - def under_construction(cls) -> "Workflow[ty.Any]": - if cls._under_construction is None: - raise ValueError( - "pydra.design.workflow.this() can only be called from within a workflow " - "constructor function (see 'pydra.design.workflow.define')" - ) - return cls._under_construction - - # Used to store the workflow that is currently being constructed - _under_construction: "Workflow[ty.Any]" = None - # Used to cache the constructed workflows by their hashed input values - _constructed: dict[int, "Workflow[ty.Any]"] = {} From 5cbb4df12a99d911297f270bb48c3582508db318 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 18 Jan 2025 10:21:57 +1100 Subject: [PATCH 136/342] added section on workers --- .../tutorial/2-advanced-execution.ipynb | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index c23ddd2da4..d9d15b17c7 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -24,10 +24,52 @@ "\n", "- `ConcurrentFutures`\n", "- `SLURM`\n", + "- `SGE`\n", "- `Dask` (experimental)\n", "- `Serial` (for debugging)\n", "\n", - "Work in progress..." + "By default, the *ConcurrentFutures* worker (abbreviated to \"cf\") will be used, which\n", + "divides tasks across multiple processes. If you are using a high-performance cluster (HPC)\n", + "then the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", + "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) workers can be used to submit each\n", + "workflow node as separate jobs to the HPC scheduler. When using a graphical debugger to\n", + "debug workflow or Python tasks, the *Serial* worker is recommended. There is also an\n", + "experimental [Dask](https://www.dask.org/) worker.\n", + "\n", + "To specify a worker, the abbreviation can be passed either as a string or using the\n", + "class itself" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "'typing.TypeVar' object is not subscriptable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdesign\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m python\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;129;43m@python\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefine\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;43;01mdef\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;21;43mPower10\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mn\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m:\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mreturn\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\n\u001b[1;32m 7\u001b[0m pow10 \u001b[38;5;241m=\u001b[39m Power10()\u001b[38;5;241m.\u001b[39msplit(n\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m])\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/design/python.py:124\u001b[0m, in \u001b[0;36mdefine\u001b[0;34m(wrapped, inputs, outputs, bases, outputs_bases, auto_attribs)\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;129m@dataclass_transform\u001b[39m(\n\u001b[1;32m 93\u001b[0m kw_only_default\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 94\u001b[0m field_specifiers\u001b[38;5;241m=\u001b[39m(arg,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 103\u001b[0m auto_attribs: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 104\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPythonDef\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 105\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m Create an interface for a function or a class.\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03m The task definition class for the Python function\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 124\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PythonDef, PythonOutputs\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmake\u001b[39m(wrapped: ty\u001b[38;5;241m.\u001b[39mCallable \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mtype\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m PythonDef:\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39misclass(wrapped):\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:1020\u001b[0m\n\u001b[1;32m 1010\u001b[0m argstr_formatted \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1011\u001b[0m argstr_formatted\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[ \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ]\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;241m.\u001b[39mstrip()\n\u001b[1;32m 1016\u001b[0m )\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m argstr_formatted\n\u001b[0;32m-> 1020\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m core \u001b[38;5;66;03m# noqa: E402\u001b[39;00m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:60\u001b[0m\n\u001b[1;32m 56\u001b[0m DefType \u001b[38;5;241m=\u001b[39m ty\u001b[38;5;241m.\u001b[39mTypeVar(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDefType\u001b[39m\u001b[38;5;124m\"\u001b[39m, bound\u001b[38;5;241m=\u001b[39mTaskDef)\n\u001b[1;32m 57\u001b[0m OutputsType \u001b[38;5;241m=\u001b[39m ty\u001b[38;5;241m.\u001b[39mTypeVar(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOutputType\u001b[39m\u001b[38;5;124m\"\u001b[39m, bound\u001b[38;5;241m=\u001b[39mTaskOutputs)\n\u001b[0;32m---> 60\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mTask\u001b[39;00m(ty\u001b[38;5;241m.\u001b[39mGeneric[\u001b[43mDefType\u001b[49m\u001b[43m[\u001b[49m\u001b[43mOutputsType\u001b[49m\u001b[43m]\u001b[49m]):\n\u001b[1;32m 61\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;124;03m A base structure for the nodes in the processing graph.\u001b[39;00m\n\u001b[1;32m 63\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 66\u001b[0m \n\u001b[1;32m 67\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 69\u001b[0m _api_version: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m0.0.1\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# Should generally not be touched by subclasses\u001b[39;00m\n", + "\u001b[0;31mTypeError\u001b[0m: 'typing.TypeVar' object is not subscriptable" + ] + } + ], + "source": [ + "from pydra.design import python\n", + "\n", + "@python.define\n", + "def Power10(n: int) -> int:\n", + " return 10 ** n\n", + "\n", + "pow10 = Power10().split(n=[1, 2, 3, 4, 5])\n", + "\n", + "p1, p2, p3, p4, p5 = pow10(worker=\"serial\")" ] }, { @@ -68,7 +110,7 @@ "source": [ "from pathlib import Path\n", "import tempfile\n", - "from fileformats.medimage import Nifti\n", + "from fileformats.medimage import Nifti1\n", "from pydra.engine.submitter import Submitter\n", "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "import nest_asyncio\n", @@ -81,7 +123,7 @@ "nifti_dir = test_dir / \"nifti\"\n", "nifti_dir.mkdir()\n", "for i in range(10):\n", - " Nifti.sample(nifti_dir, seed=i)\n", + " Nifti1.sample(nifti_dir, seed=i)\n", "\n", "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", "\n", From f639b2185223d8a03565ea564e0fe68c7076c51a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 18 Jan 2025 10:26:05 +1100 Subject: [PATCH 137/342] renamed execution options --- new-docs/source/index.rst | 2 +- new-docs/source/tutorial/2-advanced-execution.ipynb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 39ad3065b3..dd16cde900 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -72,7 +72,7 @@ Execution Learn how to execute existing tasks (including workflows) on different systems * :ref:`Getting started` -* :ref:`Advanced execution` +* :ref:`Execution options` Design ~~~~~~ diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index d9d15b17c7..5cdeaddc60 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Advanced Execution\n", + "# Execution options\n", "\n", "One of the key design features of Pydra is the separation between the parameterisation of\n", "the task to be executed, and the parameresiation of where and how the task should be\n", From 69b72c9c0192c03db6adb070c072aab8c328128d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 21 Jan 2025 17:47:24 +1100 Subject: [PATCH 138/342] debugging workflow and shell architecture refactoring --- new-docs/source/tutorial/tst.py | 14 +- new-docs/tst.py | 2 +- pydra/design/base.py | 27 ++- pydra/design/workflow.py | 6 +- pydra/engine/core.py | 252 ++++++++++--------- pydra/engine/environments.py | 22 +- pydra/engine/helpers.py | 38 ++- pydra/engine/node.py | 12 +- pydra/engine/specs.py | 347 +++++++++++++++++++-------- pydra/engine/state.py | 101 +++++--- pydra/engine/submitter.py | 212 +++++++++------- pydra/engine/task.py | 137 +---------- pydra/engine/tests/test_shelltask.py | 20 +- pydra/engine/workers.py | 19 +- 14 files changed, 656 insertions(+), 553 deletions(-) diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index b98e22c274..01cd176b4d 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,26 +1,30 @@ from pathlib import Path from tempfile import mkdtemp -from fileformats.medimage import Nifti +import shutil +from fileformats.medimage import Nifti1 from pydra.tasks.mrtrix3.v3_0 import MrGrid +from pydra.utils import user_cache_dir if __name__ == "__main__": test_dir = Path(mkdtemp()) + shutil.rmtree(user_cache_dir / "run-cache", ignore_errors=True) + nifti_dir = test_dir / "nifti" nifti_dir.mkdir() for i in range(10): - Nifti.sample( + Nifti1.sample( nifti_dir, seed=i ) # Create a dummy NIfTI file in the dest. directory # Instantiate the task definition, "splitting" over all NIfTI files in the test directory # by splitting the "input" input field over all files in the directory - mrgrid = MrGrid(voxel=(0.5, 0.5, 0.5)).split(input=nifti_dir.iterdir()) + mrgrid = MrGrid(voxel=(0.5, 0.5, 0.5)).split(in_file=nifti_dir.iterdir()) # Run the task to resample all NIfTI files - outputs = mrgrid() + outputs = mrgrid(worker="serial") # Print the locations of the output files - print("\n".join(str(p) for p in outputs.output)) + print("\n".join(str(p) for p in outputs.outputs)) diff --git a/new-docs/tst.py b/new-docs/tst.py index c9d11c8773..d3589ba074 100644 --- a/new-docs/tst.py +++ b/new-docs/tst.py @@ -11,4 +11,4 @@ result = load_json(plugin="serial") # Print the output interface of the of the task (LoadJson.Outputs) -print(result.output) +print(result.outputs) diff --git a/pydra/design/base.py b/pydra/design/base.py index c5fb817431..e2080225bc 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -388,7 +388,8 @@ def make_task_def( klass : type The class created using the attrs package """ - from pydra.engine.specs import TaskDef + from pydra.engine.specs import TaskDef, WorkflowDef + from pydra.engine.core import Task, WorkflowTask spec_type._check_arg_refs(inputs, outputs) @@ -399,6 +400,7 @@ def make_task_def( f"{reserved_names} are reserved and cannot be used for {spec_type} field names" ) outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) + task_type = WorkflowTask if issubclass(spec_type, WorkflowDef) else Task if klass is None or not issubclass(klass, spec_type): if name is None: raise ValueError("name must be provided if klass is not") @@ -417,13 +419,19 @@ def make_task_def( name=name, bases=bases, kwds={}, - exec_body=lambda ns: ns.update({"Outputs": outputs_klass}), + exec_body=lambda ns: ns.update( + { + "Outputs": outputs_klass, + "Task": task_type, + } + ), ) else: # Ensure that the class has it's own annotations dict so we can modify it without # messing up other classes klass.__annotations__ = copy(klass.__annotations__) klass.Outputs = outputs_klass + klass.Task = task_type # Now that we have saved the attributes in lists to be for arg in inputs.values(): # If an outarg input then the field type should be Path not a FileSet @@ -769,7 +777,11 @@ def extract_function_inputs_and_outputs( type_hints = ty.get_type_hints(function) input_types = {} input_defaults = {} + has_varargs = False for p in sig.parameters.values(): + if p.kind is p.VAR_POSITIONAL or p.kind is p.VAR_KEYWORD: + has_varargs = True + continue input_types[p.name] = type_hints.get(p.name, ty.Any) if p.default is not inspect.Parameter.empty: input_defaults[p.name] = p.default @@ -779,11 +791,12 @@ def extract_function_inputs_and_outputs( f"Input names ({inputs}) should not be provided when " "wrapping/decorating a function as " ) - if unrecognised := set(inputs) - set(input_types): - raise ValueError( - f"Unrecognised input names ({unrecognised}) not present in the signature " - f"of the function {function!r}" - ) + if not has_varargs: + if unrecognised := set(inputs) - set(input_types): + raise ValueError( + f"Unrecognised input names ({unrecognised}) not present in the signature " + f"of the function {function!r}" + ) for inpt_name, type_ in input_types.items(): try: inpt = inputs[inpt_name] diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 841eebf1c0..6865ed542a 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -170,7 +170,7 @@ def make(wrapped: ty.Callable | type) -> TaskDef: for inpt_name in lazy: parsed_inputs[inpt_name].lazy = True - interface = make_task_def( + defn = make_task_def( WorkflowDef, WorkflowOutputs, parsed_inputs, @@ -181,7 +181,7 @@ def make(wrapped: ty.Callable | type) -> TaskDef: outputs_bases=outputs_bases, ) - return interface + return defn if wrapped is not None: if not isinstance(wrapped, (ty.Callable, type)): @@ -198,7 +198,7 @@ def this() -> "Workflow": Workflow The workflow currently being constructed. """ - from pydra.engine.workflow.base import Workflow + from pydra.engine.core import Workflow return Workflow.under_construction diff --git a/pydra/engine/core.py b/pydra/engine/core.py index e1bae23c09..4db3f009d0 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -1,15 +1,12 @@ """Basic processing graph elements.""" -import abc import json import logging import os import sys from pathlib import Path import typing as ty -from copy import deepcopy from uuid import uuid4 -import inspect import shutil from traceback import format_exception import attr @@ -18,6 +15,7 @@ from operator import itemgetter from typing_extensions import Self import attrs +from filelock import SoftFileLock from pydra.engine.specs import TaskDef, WorkflowDef, TaskOutputs, WorkflowOutputs from pydra.engine.graph import DiGraph from pydra.engine import state @@ -35,14 +33,13 @@ create_checksum, attrs_fields, attrs_values, - print_help, load_result, save, - ensure_list, record_error, PydraFileLock, list_fields, is_lazy, + ensure_list, ) from .helpers_file import copy_nested_files, template_update from pydra.utils.messenger import AuditFlag @@ -115,27 +112,6 @@ def __init__( b. Gets killed -> restart 3. No cache or other process -> start 4. Two or more concurrent new processes get to start - - Parameters - ---------- - name : :obj:`str` - Unique name of this node - audit_flags : :class:`AuditFlag`, optional - Configure provenance tracking. Default is no provenance tracking. - See available flags at :class:`~pydra.utils.messenger.AuditFlag`. - cache_dir : :obj:`os.pathlike` - Set a custom directory of previously computed nodes. - cache_locations : - TODO - inputs : :obj:`typing.Text`, or :class:`File`, or :obj:`dict`, or `None`. - Set particular inputs to this node. - cont_dim : :obj:`dict`, or `None` - Container dimensions for input fields, - if any of the container should be treated as a container - messenger_args : - TODO - messengers : - TODO """ from . import check_latest_version @@ -147,7 +123,6 @@ def __init__( self.definition = definition self.name = name - self.submitter = submitter self.state_index = state_index # checking if metadata is set properly @@ -163,12 +138,35 @@ def __init__( self.allow_cache_override = True self._checksum = None self._uid = uuid4().hex - - self.plugin = None self.hooks = TaskHook() self._errored = False self._lzout = None + # Save the submitter attributes needed to run the task later + self.audit = submitter.audit + self.cache_dir = submitter.cache_dir + self.cache_locations = submitter.cache_locations + + @property + def cache_dir(self): + return self._cache_dir + + @cache_dir.setter + def cache_dir(self, path: os.PathLike): + self._cache_dir = Path(path) + + @property + def cache_locations(self): + """Get the list of cache sources.""" + return self._cache_locations + ensure_list(self.cache_dir) + + @cache_locations.setter + def cache_locations(self, locations): + if locations is not None: + self._cache_locations = [Path(loc) for loc in ensure_list(locations)] + else: + self._cache_locations = [] + def __str__(self): return self.name @@ -181,17 +179,6 @@ def __setstate__(self, state): state["definition"] = cp.loads(state["definition"]) self.__dict__.update(state) - def help(self, returnhelp=False): - """Print class help.""" - help_obj = print_help(self) - if returnhelp: - return help_obj - - @property - def version(self): - """Get version of this task structure.""" - return self._version - @property def errored(self): """Check if the task has raised an error""" @@ -204,10 +191,16 @@ def checksum(self): and to create nodes checksums needed for graph checksums (before the tasks have inputs etc.) """ + if self._checksum is not None: + return self._checksum input_hash = self.definition._hash self._checksum = create_checksum(self.__class__.__name__, input_hash) return self._checksum + @property + def lockfile(self): + return self.output_dir.with_suffix(".lock") + @property def uid(self): """the unique id number for the task @@ -239,12 +232,12 @@ def set_state(self, splitter, combiner=None): @property def output_names(self): """Get the names of the outputs from the task's output_spec - (not everything has to be generated, see generated_output_names). + (not everything has to be generated, see _generated_output_names). """ return [f.name for f in attr.fields(self.definition.Outputs)] @property - def generated_output_names(self): + def _generated_output_names(self): return self.output_names @property @@ -252,41 +245,10 @@ def can_resume(self): """Whether the task accepts checkpoint-restart.""" return self._can_resume - @abc.abstractmethod - def _run_task(self, environment=None): - pass - - @property - def cache_locations(self): - """Get the list of cache sources.""" - return self._cache_locations + ensure_list(self._cache_dir) - - @cache_locations.setter - def cache_locations(self, locations): - if locations is not None: - self._cache_locations = [Path(loc) for loc in ensure_list(locations)] - else: - self._cache_locations = [] - @property def output_dir(self): """Get the filesystem path where outputs will be written.""" - return self._cache_dir / self.checksum - - @property - def cont_dim(self): - # adding inner_cont_dim to the general container_dimension provided by the users - cont_dim_all = deepcopy(self._cont_dim) - for k, v in self._inner_cont_dim.items(): - cont_dim_all[k] = cont_dim_all.get(k, 1) + v - return cont_dim_all - - @cont_dim.setter - def cont_dim(self, cont_dim): - if cont_dim is None: - self._cont_dim = {} - else: - self._cont_dim = cont_dim + return self.cache_dir / self.checksum @property def inputs(self) -> dict[str, ty.Any]: @@ -335,7 +297,7 @@ def inputs(self) -> dict[str, ty.Any]: ) return self._inputs - def _populate_filesystem(self, checksum, output_dir): + def _populate_filesystem(self): """ Invoked immediately after the lockfile is generated, this function: - Creates the cache file @@ -347,45 +309,43 @@ def _populate_filesystem(self, checksum, output_dir): # adding info file with the checksum in case the task was cancelled # and the lockfile has to be removed with open(self.cache_dir / f"{self.uid}_info.json", "w") as jsonfile: - json.dump({"checksum": checksum}, jsonfile) - if not self.can_resume and output_dir.exists(): - shutil.rmtree(output_dir) - output_dir.mkdir(parents=False, exist_ok=self.can_resume) + json.dump({"checksum": self.checksum}, jsonfile) + if not self.can_resume and self.output_dir.exists(): + shutil.rmtree(self.output_dir) + self.output_dir.mkdir(parents=False, exist_ok=self.can_resume) - async def run(self, submitter: "Submitter"): - checksum = self.checksum - output_dir = self.output_dir - lockfile = self.cache_dir / (checksum + ".lock") + def run(self, rerun: bool = False): self.hooks.pre_run(self) - logger.debug("'%s' is attempting to acquire lock on %s", self.name, lockfile) - async with PydraFileLock(lockfile): - if not (submitter.rerun): + logger.debug( + "'%s' is attempting to acquire lock on %s", self.name, self.lockfile + ) + with SoftFileLock(self.lockfile): + if not (rerun): result = self.result() if result is not None and not result.errored: return result cwd = os.getcwd() - self._populate_filesystem(checksum, output_dir) - result = Result(output=None, runtime=None, errored=False) + self._populate_filesystem() + os.chdir(self.output_dir) + result = Result(outputs=None, runtime=None, errored=False, task=self) self.hooks.pre_run_task(self) - self.audit.start_audit(odir=output_dir) + self.audit.start_audit(odir=self.output_dir) + if self.audit.audit_check(AuditFlag.PROV): + self.audit.audit_task(task=self) try: self.audit.monitor() - if inspect.iscoroutinefunction(self._run_task): - await self.definition._run(self, submitter) - else: - self.definition._run(self, submitter) - result.output = self.definition.Outputs.from_task(self) + run_outputs = self.definition._run(self) + result.outputs = self.definition.Outputs.from_task(self, run_outputs) except Exception: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) - record_error(output_dir, error=traceback) + record_error(self.output_dir, error=traceback) result.errored = True - self._errored = True raise finally: self.hooks.post_run_task(self, result) self.audit.finalize_audit(result=result) - save(output_dir, result=result, task=self) + save(self.output_dir, result=result, task=self) # removing the additional file with the checksum (self.cache_dir / f"{self.uid}_info.json").unlink() os.chdir(cwd) @@ -397,7 +357,7 @@ async def run(self, submitter: "Submitter"): def pickle_task(self): """Pickling the tasks with full inputs""" - pkl_files = self.submitter.cache_dir / "pkl_files" + pkl_files = self.cache_dir / "pkl_files" pkl_files.mkdir(exist_ok=True, parents=True) task_main_path = pkl_files / f"{self.name}_{self.uid}_task.pklz" save(task_path=pkl_files, task=self, name_prefix=f"{self.name}_{self.uid}") @@ -410,33 +370,12 @@ def done(self): if has_lazy(self.definition): return False _result = self.result() - if self.state: - # TODO: only check for needed state result - if _result and all(_result): - if self.state.combiner and isinstance(_result[0], list): - for res_l in _result: - if any([res.errored for res in res_l]): - raise ValueError(f"Task {self.name} raised an error") - return True - else: - if any([res.errored for res in _result]): - raise ValueError(f"Task {self.name} raised an error") - return True - # checking if self.result() is not an empty list only because - # the states_ind is an empty list (input field might be an empty list) - elif ( - _result == [] - and hasattr(self.state, "states_ind") - and self.state.states_ind == [] - ): + if _result: + if _result.errored: + self._errored = True + raise ValueError(f"Task {self.name} raised an error") + else: return True - else: - if _result: - if _result.errored: - self._errored = True - raise ValueError(f"Task {self.name} raised an error") - else: - return True return False def _combined_output(self, return_inputs=False): @@ -479,7 +418,7 @@ def result(self, state_index=None, return_inputs=False): # TODO: check if result is available in load_result and # return a future if not if self.errored: - return Result(output=None, runtime=None, errored=True) + return Result(outputs=None, runtime=None, errored=True, task=self) if state_index is not None: raise ValueError("Task does not have a state") @@ -544,6 +483,58 @@ def _check_for_hash_changes(self): DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any +class WorkflowTask(Task): + + def __init__( + self, + definition: DefType, + submitter: "Submitter", + name: str, + state_index: "state.StateIndex | None" = None, + ): + super().__init__(definition, submitter, name, state_index) + self.submitter = submitter + + async def run(self, rerun: bool = False): + self.hooks.pre_run(self) + logger.debug( + "'%s' is attempting to acquire lock on %s", self.name, self.lockfile + ) + async with PydraFileLock(self.lockfile): + if not rerun: + result = self.result() + if result is not None and not result.errored: + return result + cwd = os.getcwd() + self._populate_filesystem() + result = Result(outputs=None, runtime=None, errored=False, task=self) + self.hooks.pre_run_task(self) + self.audit.start_audit(odir=self.output_dir) + try: + self.audit.monitor() + await self.submitter.expand_workflow(self) + result.outputs = self.definition.Outputs.from_task(self) + except Exception: + etype, eval, etr = sys.exc_info() + traceback = format_exception(etype, eval, etr) + record_error(self.output_dir, error=traceback) + result.errored = True + self._errored = True + raise + finally: + self.hooks.post_run_task(self, result) + self.audit.finalize_audit(result=result) + save(self.output_dir, result=result, task=self) + # removing the additional file with the checksum + (self.cache_dir / f"{self.uid}_info.json").unlink() + os.chdir(cwd) + self.hooks.post_run(self, result) + # Check for any changes to the input hashes that have occurred during the execution + # of the task + self._check_for_hash_changes() + return result + + logger = logging.getLogger("pydra") OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) @@ -631,7 +622,7 @@ def construct( output_lazy_fields = constructor(**input_values) # Check to see whether any mandatory inputs are not set for node in wf.nodes: - node._spec._check_rules() + node._definition._check_rules() # Check that the outputs are set correctly, either directly by the constructor # or via returned values that can be zipped with the output names if output_lazy_fields: @@ -719,6 +710,8 @@ def under_construction(cls) -> "Workflow[ty.Any]": _constructed: dict[int, "Workflow[ty.Any]"] = {} def execution_graph(self, submitter: "Submitter") -> DiGraph: + from pydra.engine.submitter import NodeExecution + return self._create_graph([NodeExecution(n, submitter) for n in self.nodes]) @property @@ -746,7 +739,7 @@ def _create_graph( DiGraph The graph of the workflow """ - graph: DiGraph = attrs.field(factory=DiGraph) + graph: DiGraph = DiGraph() for node in nodes: graph.add_nodes(node) # TODO: create connection is run twice @@ -812,6 +805,7 @@ def _create_graph( other_states=other_states, combiner=combiner, ) + return graph def create_dotfile(self, type="simple", export=None, name=None, output_dir=None): """creating a graph - dotfile and optionally exporting to other formats""" @@ -861,8 +855,10 @@ def is_task(obj): def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" from pydra.engine.specs import WorkflowDef + from pydra.engine.core import Workflow + from pydra.engine.core import WorkflowTask - return isinstance(obj, WorkflowDef) + return isinstance(obj, (WorkflowDef, WorkflowTask, Workflow)) def has_lazy(obj): diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 80193c87db..1481e14f7b 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -17,7 +17,7 @@ class Environment: def setup(self): pass - def execute(self, task: "ShellTask"): + def execute(self, task: "ShellTask") -> dict[str, ty.Any]: """ Execute the task in the environment. @@ -28,7 +28,7 @@ def execute(self, task: "ShellTask"): Returns ------- - output + output: dict[str, Any] Output of the task. """ raise NotImplementedError @@ -42,9 +42,9 @@ class Native(Environment): Native environment, i.e. the tasks are executed in the current python environment. """ - def execute(self, task: "ShellTask"): + def execute(self, task: "ShellTask") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] - values = execute(task.command_args(), strip=task.strip) + values = execute(task.definition._command_args(), strip=task.strip) output = dict(zip(keys, values)) if output["return_code"]: msg = f"Error running '{task.name}' task with {task.command_args()}:" @@ -90,10 +90,10 @@ def bind(self, loc, mode="ro"): class Docker(Container): """Docker environment.""" - def execute(self, task: "ShellTask"): + def execute(self, task: "ShellTask") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts = task.get_bindings(root=self.root) + mounts = task.definition._get_bindings(root=self.root) docker_args = [ "docker", @@ -111,7 +111,7 @@ def execute(self, task: "ShellTask"): keys = ["return_code", "stdout", "stderr"] values = execute( - docker_args + [docker_img] + task.command_args(root=self.root), + docker_args + [docker_img] + task.definition._command_args(root=self.root), strip=task.strip, ) output = dict(zip(keys, values)) @@ -126,10 +126,10 @@ def execute(self, task: "ShellTask"): class Singularity(Container): """Singularity environment.""" - def execute(self, task: "ShellTask"): + def execute(self, task: "ShellTask") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts = task.get_bindings(root=self.root) + mounts = task.definition._get_bindings(root=self.root) # todo adding xargsy etc singularity_args = [ @@ -148,7 +148,9 @@ def execute(self, task: "ShellTask"): keys = ["return_code", "stdout", "stderr"] values = execute( - singularity_args + [singularity_img] + task.command_args(root=self.root), + singularity_args + + [singularity_img] + + task.definition._command_args(root=self.root), strip=task.strip, ) output = dict(zip(keys, values)) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 2a6e7ec0d2..8ce9c72093 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -18,7 +18,8 @@ from fileformats.core import FileSet if ty.TYPE_CHECKING: - from .specs import TaskDef + from .specs import TaskDef, Result + from .core import Task from pydra.design.base import Field @@ -128,7 +129,12 @@ def load_result(checksum, cache_locations): return None -def save(task_path: Path, result=None, task=None, name_prefix=None) -> None: +def save( + task_path: Path, + result: "Result | None" = None, + task: "Task | None" = None, + name_prefix: str = None, +) -> None: """ Save a :class:`~pydra.engine.core.TaskBase` object and/or results. @@ -154,7 +160,7 @@ def save(task_path: Path, result=None, task=None, name_prefix=None) -> None: lockfile = task_path.parent / (task_path.name + "_save.lock") with SoftFileLock(lockfile): if result: - if task_path.name.startswith("Workflow") and result.output is not None: + if task_path.name.startswith("Workflow") and result.outputs is not None: # copy files to the workflow directory result = copyfile_workflow(wf_path=task_path, result=result) with (task_path / f"{name_prefix}_result.pklz").open("wb") as fp: @@ -168,12 +174,12 @@ def copyfile_workflow(wf_path: os.PathLike, result): """if file in the wf results, the file will be copied to the workflow directory""" from .helpers_file import copy_nested_files - for field in attrs_fields(result.output): - value = getattr(result.output, field.name) + for field in attrs_fields(result.outputs): + value = getattr(result.outputs, field.name) # if the field is a path or it can contain a path _copyfile_single_value is run # to move all files and directories to the workflow directory new_value = copy_nested_files(value, wf_path, mode=FileSet.CopyMode.hardlink) - setattr(result.output, field.name, new_value) + setattr(result.outputs, field.name, new_value) return result @@ -424,16 +430,26 @@ def get_available_cpus(): return os.cpu_count() -def load_and_run(task_pkl, rerun=False, submitter=None, plugin=None, **kwargs): +def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: """ loading a task from a pickle file, settings proper input and running the task + + Parameters + ---------- + task_pkl : :obj:`Path` + The path to pickled task file + + Returns + ------- + resultfile : :obj:`Path` + The path to the pickled result file """ from .specs import Result try: - task = load_task(task_pkl=task_pkl) + task: Task = load_task(task_pkl=task_pkl) except Exception: if task_pkl.parent.exists(): etype, eval, etr = sys.exc_info() @@ -445,7 +461,7 @@ def load_and_run(task_pkl, rerun=False, submitter=None, plugin=None, **kwargs): resultfile = task.output_dir / "_result.pklz" try: - task(rerun=rerun, plugin=plugin, submitter=submitter, **kwargs) + task(rerun=rerun) except Exception as e: # creating result and error files if missing errorfile = task.output_dir / "_error.pklz" @@ -461,13 +477,13 @@ def load_and_run(task_pkl, rerun=False, submitter=None, plugin=None, **kwargs): return resultfile -async def load_and_run_async(task_pkl, submitter=None, rerun=False, **kwargs): +async def load_and_run_async(task_pkl): """ loading a task from a pickle file, settings proper input and running the workflow """ task = load_task(task_pkl=task_pkl) - await task._run(submitter=submitter, rerun=rerun, **kwargs) + await task() def load_task(task_pkl): diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 98c168c394..bfe484fb45 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -216,9 +216,6 @@ def _checksum_states(self, state_index=None): return checksum_ind else: checksum_list = [] - if not hasattr(self.state, "inputs_ind"): - self.state.prepare_states(self._definition, cont_dim=self.cont_dim) - self.state.prepare_inputs() for ind in range(len(self.state.inputs_ind)): checksum_list.append(self._checksum_states(state_index=ind)) return checksum_list @@ -269,6 +266,7 @@ def _set_state( else: self._state = State( self.name, + self._definition, splitter=splitter, other_states=other_states, combiner=combiner, @@ -295,16 +293,16 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: def _extract_input_el(self, inputs, inp_nm, ind): """ Extracting element of the inputs taking into account - container dimension of the specific element that can be set in self.cont_dim. + container dimension of the specific element that can be set in self.state.cont_dim. If input name is not in cont_dim, it is assumed that the input values has a container dimension of 1, so only the most outer dim will be used for splitting. If """ - if f"{self.name}.{inp_nm}" in self.cont_dim: + if f"{self.name}.{inp_nm}" in self.state.cont_dim: return list( hlpst.flatten( ensure_list(getattr(inputs, inp_nm)), - max_depth=self.cont_dim[f"{self.name}.{inp_nm}"], + max_depth=self.state.cont_dim[f"{self.name}.{inp_nm}"], ) )[ind] else: @@ -326,7 +324,7 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: ind=input_ind[f"{self.name}.{inp}"], ) split_defs[StateIndex(input_ind)] = attrs.evolve( - self._definition, inputs_dict + self._definition, **inputs_dict ) return split_defs diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index dc19aca68f..3d1b1d0688 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -3,6 +3,7 @@ from pathlib import Path import re from copy import copy +import os import inspect import itertools import platform @@ -14,7 +15,8 @@ from typing_extensions import Self import attrs import cloudpickle as cp -from pydra.engine.audit import AuditFlag +from fileformats.generic import FileSet +from pydra.utils.messenger import AuditFlag, Messenger from pydra.utils.typing import TypeParser from .helpers import ( attrs_fields, @@ -25,7 +27,7 @@ ensure_list, parse_format_string, ) -from .helpers_file import template_update +from .helpers_file import template_update, template_update_single from . import helpers_state as hlpst from . import lazy from pydra.utils.hash import hash_function, Cache @@ -37,7 +39,8 @@ from pydra.engine.core import Task from pydra.engine.task import ShellTask from pydra.engine.core import Workflow - from pydra.engine.submitter import Submitter + from pydra.engine.environments import Environment + from pydra.engine.workers import Worker def is_set(value: ty.Any) -> bool: @@ -118,12 +121,93 @@ class TaskDef(ty.Generic[OutputsType]): Task: "ty.Type[core.Task]" # The following fields are used to store split/combine state information - _splitter = attrs.field(default=None, init=False) - _combiner = attrs.field(default=None, init=False) - _cont_dim = attrs.field(default=None, init=False) + _splitter = attrs.field(default=None, init=False, repr=False) + _combiner = attrs.field(default=None, init=False, repr=False) + _cont_dim = attrs.field(default=None, init=False, repr=False) + _hashes = attrs.field(default=None, init=False, eq=False, repr=False) RESERVED_FIELD_NAMES = ("split", "combine") + def __call__( + self, + cache_dir: os.PathLike | None = None, + worker: "str | ty.Type[Worker] | Worker" = "cf", + environment: "Environment | None" = None, + rerun: bool = False, + cache_locations: ty.Iterable[os.PathLike] | None = None, + audit_flags: AuditFlag = AuditFlag.NONE, + messengers: ty.Iterable[Messenger] | None = None, + messenger_args: dict[str, ty.Any] | None = None, + **kwargs: ty.Any, + ) -> OutputsType: + """Create a task from this definition and execute it to produce a result. + + Parameters + ---------- + cache_dir : os.PathLike, optional + Cache directory where the working directory/results for the task will be + stored, by default None + worker : str or Worker, optional + The worker to use, by default "cf" + environment: Environment, optional + The execution environment to use, by default None + rerun : bool, optional + Whether to force the re-computation of the task results even if existing + results are found, by default False + cache_locations : list[os.PathLike], optional + Alternate cache locations to check for pre-computed results, by default None + audit_flags : AuditFlag, optional + Auditing configuration, by default AuditFlag.NONE + messengers : list, optional + Messengers, by default None + messenger_args : dict, optional + Messenger arguments, by default None + **kwargs : dict + Keyword arguments to pass on to the worker initialisation + + Returns + ------- + OutputsType or list[OutputsType] + The output interface of the task, or in the case of split tasks, a list of + output interfaces + """ + from pydra.engine.submitter import ( # noqa: F811 + Submitter, + WORKER_KWARG_FAIL_NOTE, + ) + + try: + with Submitter( + audit_flags=audit_flags, + cache_dir=cache_dir, + cache_locations=cache_locations, + messenger_args=messenger_args, + messengers=messengers, + rerun=rerun, + environment=environment, + worker=worker, + **kwargs, + ) as sub: + result = sub(self) + except TypeError as e: + if hasattr(e, "__notes__") and WORKER_KWARG_FAIL_NOTE in e.__notes__: + if match := re.match( + r".*got an unexpected keyword argument '(\w+)'", str(e) + ): + if match.group(1) in self: + e.add_note( + f"Note that the unrecognised argument, {match.group(1)!r}, is " + f"an input of the task definition {self!r} that has already been " + f"parameterised (it is being called to execute it)" + ) + raise + if result.errored: + raise RuntimeError( + f"Task {self} failed @ {result.errors['time of crash']} with following errors:\n" + + "\n".join(result.errors["error message"]) + ) + return result.outputs + def split( self, splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, @@ -148,7 +232,7 @@ def split( If input name is not in cont_dim, it is assumed that the input values has a container dimension of 1, so only the most outer dim will be used for splitting. **inputs - fields to split over, will automatically be wrapped in a StateArray object + fields to split over, will be automatically wrapped in a StateArray object and passed to the node inputs Returns @@ -188,8 +272,7 @@ def split( f"Container dimension for {field_name} is provided but the field " f"is not present in the inputs" ) - self._splitter = splitter - self._cont_dim = cont_dim + split_inputs = {} for name, value in inputs.items(): if isinstance(value, lazy.LazyField): split_val = value.split(splitter) @@ -199,8 +282,11 @@ def split( split_val = StateArray(value) else: raise TypeError(f"Could not split {value} as it is not a sequence type") - setattr(self, name, split_val) - return self + split_inputs[name] = split_val + split_def = attrs.evolve(self, **split_inputs) + split_def._splitter = splitter + split_def._cont_dim = cont_dim + return split_def def combine( self, @@ -238,86 +324,9 @@ def combine( raise ValueError( f"Combiner fields {unrecognised} are not present in the task definition" ) - self._combiner = combiner - return self - - def __call__( - self, - name: str | None = None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - cache_locations=None, - messengers=None, - messenger_args=None, - rerun=False, - **exec_kwargs, - ) -> OutputsType: - """Create a task from this definition and execute it to produce a result. - - Parameters - ---------- - name : str, optional - The name of the task, by default None - audit_flags : AuditFlag, optional - Auditing configuration, by default AuditFlag.NONE - cache_dir : os.PathLike, optional - Cache directory where the working directory/results for the task will be - stored, by default None - cache_locations : list[os.PathLike], optional - Alternate cache locations to check for pre-computed results, by default None - messenger_args : dict, optional - Messenger arguments, by default None - messengers : list, optional - Messengers, by default None - rerun : bool, optional - Whether to force the re-computation of the task results even if existing - results are found, by default False - exec_kwargs : dict - Keyword arguments to pass on to the Submitter object used to execute the task - - Returns - ------- - OutputsType or list[OutputsType] - The output interface of the task, or in the case of split tasks, a list of - output interfaces - """ - from pydra.engine.submitter import Submitter - - self._check_rules() - if self._splitter: - # Create an implicit workflow to hold the split nodes - from pydra.design import workflow - - outputs = {o.name: list[o.type] for o in list_fields(self.Outputs)} - - @workflow.define(outputs=outputs) - def Split(): - node = workflow.add(self) - return tuple(getattr(node, o) for o in outputs) - - definition = Split() - - elif self._combiner: - raise ValueError( - f"Task {self} is marked for combining, but not splitting. " - "Use the `split` method to split the task before combining." - ) - else: - definition = self - - with Submitter( - audit_flags=audit_flags, - cache_dir=cache_dir, - cache_locations=cache_locations, - messenger_args=messenger_args, - messengers=messengers, - rerun=rerun, - **exec_kwargs, - ) as sub: - result = sub(definition) - if result.errored: - raise ValueError(f"Task {definition} failed with an error") - return result.output + combined_def = copy(self) + combined_def._combiner = combiner + return combined_def def __iter__(self) -> ty.Generator[str, None, None]: """Iterate through all the names in the definition""" @@ -359,7 +368,7 @@ def _hash_changes(self): def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: """Compute a basic hash for any given set of fields.""" inp_dict = {} - for field in attrs_fields(self): + for field in list_fields(self): if isinstance(field, Out): continue # Skip output fields # removing values that are not set from hash calculation @@ -486,19 +495,20 @@ class Runtime: class Result(ty.Generic[OutputsType]): """Metadata regarding the outputs of processing.""" - output: OutputsType | None = None + task: "Task" + outputs: OutputsType | None = None runtime: Runtime | None = None errored: bool = False def __getstate__(self): state = attrs_values(self) - if state["output"] is not None: - state["output"] = cp.dumps(state["output"]) + if state["outputs"] is not None: + state["outputs"] = cp.dumps(state["outputs"]) return state def __setstate__(self, state): - if state["output"] is not None: - state["output"] = cp.loads(state["output"]) + if state["outputs"] is not None: + state["outputs"] = cp.loads(state["outputs"]) for name, val in state.items(): setattr(self, name, val) @@ -511,9 +521,16 @@ def get_output_field(self, field_name): Name of field in LazyField object """ if field_name == "all_": - return attrs_values(self.output) + return attrs_values(self.outputs) else: - return getattr(self.output, field_name) + return getattr(self.outputs, field_name) + + @property + def errors(self): + if self.errored: + with open(self.task.output_dir / "_error.pklz", "rb") as f: + return cp.load(f) + return None @attrs.define(kw_only=True) @@ -616,9 +633,6 @@ def construct(self) -> "Workflow": self._constructed = Workflow.construct(self) return self._constructed - async def _run(self, task: "Task", submitter: "Submitter") -> Result: - await submitter.expand_workflow(task) - RETURN_CODE_HELP = """The process' exit code.""" STDOUT_HELP = """The standard output stream produced by the command.""" @@ -674,7 +688,7 @@ def from_task( elif is_set(fld.default): resolved_value = cls._resolve_default_value(fld, task.output_dir) else: - resolved_value = task.resolve_value(fld, outputs.stdout, outputs.stderr) + resolved_value = cls._resolve_value(fld, outputs.stdout, outputs.stderr) # Set the resolved value setattr(outputs, fld.name, resolved_value) return outputs @@ -730,6 +744,61 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellDef") -> bool: # Check to see if any of the requirement sets are satisfied return any(rs.satisfied(inputs) for rs in requirements) + @classmethod + def _resolve_value( + cls, + fld: "shell.out", + task: "Task", + outputs: dict[str, ty.Any], + ) -> ty.Any: + """Collect output file if metadata specified.""" + from pydra.design import shell + + if not cls._required_fields_satisfied(fld, task.definition): + return None + elif isinstance(fld, shell.outarg) and fld.path_template: + return template_update_single( + fld, + definition=task.definition, + output_dir=task.output_dir, + spec_type="output", + ) + elif fld.callable: + callable_ = fld.callable + if isinstance(fld.callable, staticmethod): + # In case callable is defined as a static method, + # retrieve the function wrapped in the descriptor. + callable_ = fld.callable.__func__ + call_args = inspect.getfullargspec(callable_) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = fld + elif argnm == "output_dir": + call_args_val[argnm] = task.output_dir + elif argnm == "inputs": + call_args_val[argnm] = task.inputs + elif argnm == "stdout": + call_args_val[argnm] = outputs["stdout"] + elif argnm == "stderr": + call_args_val[argnm] = outputs["stderr"] + else: + try: + call_args_val[argnm] = task.inputs[argnm] + except KeyError as e: + e.add_note( + f"arguments of the callable function from {fld.name} " + f"has to be in inputs or be field or output_dir, " + f"but {argnm} is used" + ) + raise + return callable_(**call_args_val) + else: + raise Exception( + f"Metadata for '{fld.name}', does not not contain any of the required fields " + f'("callable", "output_file_template" or "value"): {fld}.' + ) + ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) @@ -738,6 +807,10 @@ class ShellDef(TaskDef[ShellOutputsType]): RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) + def _run(self, environment: "Environment") -> None: + """Run the shell command.""" + return environment.execute(self) + @property def cmdline(self) -> str: """The equivalent command line that would be submitted if the task were run on @@ -941,6 +1014,74 @@ def _command_pos_args( cmd_add += split_cmd(cmd_el_str) return field.position, cmd_add + def _get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: + """Return bindings necessary to run task in an alternative root. + + This is primarily intended for contexts when a task is going + to be run in a container with mounted volumes. + + Arguments + --------- + root: str + + Returns + ------- + bindings: dict + Mapping from paths in the host environment to the target environment + """ + + if root is None: + return {} + else: + self._prepare_bindings(root=root) + return self.bindings + + def _prepare_bindings(self, root: str): + """Prepare input files to be passed to the task + + This updates the ``bindings`` attribute of the current task to make files available + in an ``Environment``-defined ``root``. + """ + fld: Arg + for fld in attrs_fields(self): + if TypeParser.contains_type(FileSet, fld.type): + fileset: FileSet = self[fld.name] + if not isinstance(fileset, FileSet): + raise NotImplementedError( + "Generating environment bindings for nested FileSets are not " + "yet supported" + ) + copy = fld.copy_mode == FileSet.CopyMode.copy + + host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") + + # Default to mounting paths as read-only, but respect existing modes + old_mode = self.bindings.get(host_path, ("", "ro"))[1] + self.bindings[host_path] = (env_path, "rw" if copy else old_mode) + + # Provide in-container paths without type-checking + self.inputs_mod_root[fld.name] = tuple( + env_path / rel for rel in fileset.relative_fspaths + ) + + def _generated_output_names(self, stdout: str, stderr: str): + """Returns a list of all outputs that will be generated by the task. + Takes into account the task input and the requires list for the output fields. + TODO: should be in all Output specs? + """ + # checking the input (if all mandatory fields are provided, etc.) + self._check_rules() + output_names = ["return_code", "stdout", "stderr"] + for fld in list_fields(self): + # assuming that field should have either default or metadata, but not both + if is_set(fld.default): + output_names.append(fld.name) + elif is_set(self.Outputs._resolve_output_value(fld, stdout, stderr)): + output_names.append(fld.name) + return output_names + + DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent + def donothing(*args: ty.Any, **kwargs: ty.Any) -> None: return None diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 84e131b57b..c97d71a53d 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -5,11 +5,13 @@ from collections import OrderedDict from functools import reduce import typing as ty -import attrs from . import helpers_state as hlpst from .helpers import ensure_list, attrs_values # from .specs import BaseDef +if ty.TYPE_CHECKING: + from .specs import TaskDef + # TODO: move to State op = {".": zip, "*": itertools.product} @@ -35,7 +37,7 @@ def __init__(self, indices: dict[str, int] | None = None): if indices is None: self.indices = OrderedDict() else: - self.indices = OrderedDict(sorted(indices)) + self.indices = OrderedDict(sorted(indices.items())) def __hash__(self): return hash(tuple(self.indices.items())) @@ -112,7 +114,15 @@ class State: """ - def __init__(self, name, splitter=None, combiner=None, other_states=None): + def __init__( + self, + name, + definition: "TaskDef", + splitter=None, + combiner=None, + cont_dim=None, + other_states=None, + ): """ Initialize a state. @@ -130,10 +140,13 @@ def __init__(self, name, splitter=None, combiner=None, other_states=None): """ self.name = name + self.definition = definition self.other_states = other_states self.splitter = splitter # temporary combiner self.combiner = combiner + self.cont_dim = cont_dim or {} + self._inputs_ind = None # if other_states, the connections have to be updated if self.other_states: self.update_connections() @@ -145,6 +158,26 @@ def __str__(self): f"and combiner: {self.combiner}" ) + @property + def names(self): + """Return the names of the states.""" + # analysing states from connected tasks if inner_inputs + previous_states_keys = { + f"_{v.name}": v.keys_final for v in self.inner_inputs.values() + } + names = [] + # iterating splitter_rpn + for token in self.splitter_rpn: + if token in [".", "*"]: # token is one of the input var + continue + # adding variable to the stack + if token.startswith("_"): + new_keys = previous_states_keys[token] + names += new_keys + else: + names.append(token) + return names + @property def depth(self) -> int: """Return the number of uncombined splits of the state, i.e. the number nested @@ -249,6 +282,16 @@ def current_splitter(self): else: return self.splitter + @property + def inputs_ind(self): + """dictionary for every state that contains indices for all task inputs + (i.e. inputs that are relevant for current task, can be outputs from previous nodes) + """ + if self._inputs_ind is None: + self.prepare_states() + self.prepare_inputs() + return self._inputs_ind + @current_splitter.setter def current_splitter(self, current_splitter): self._current_splitter = current_splitter @@ -796,7 +839,11 @@ def combiner_validation(self): if set(self._combiner) - set(self.splitter_rpn): raise hlpst.PydraStateError("all combiners have to be in the splitter") - def prepare_states(self, inputs, cont_dim=None): + def prepare_states( + self, + inputs: dict[str, ty.Any] | None = None, + cont_dim: dict[str, int] | None = None, + ): """ Prepare a full list of state indices and state values. @@ -805,28 +852,21 @@ def prepare_states(self, inputs, cont_dim=None): State Values specific elements from inputs that can be used running interfaces - - Parameters - ---------- - inputs : :obj:`dict` - inputs of the task - cont_dim : :obj:`dict` or `None` - container's dimensions for a specific input's fields """ # checking if splitter and combiner have valid forms self.splitter_validation() self.combiner_validation() self.set_input_groups() # container dimension for each input, specifies how nested the input is - if cont_dim: - self.cont_dim = cont_dim - else: - self.cont_dim = {} - if attrs.has(inputs): - self.inputs = attrs_values(inputs) - else: - self.inputs = inputs + if inputs is None: + inputs = { + f"{self.name}.{n}": v for n, v in attrs_values(self.definition).items() + } + self.inputs = inputs + if not self.cont_dim: + self.cont_dim = cont_dim or {} if self.other_states: + st: State for nm, (st, _) in self.other_states.items(): # I think now this if is never used if not hasattr(st, "states_ind"): @@ -951,7 +991,7 @@ def prepare_inputs(self): """ if not self.other_states: - self.inputs_ind = self.states_ind + self._inputs_ind = self.states_ind else: # elements from the current node (the current part of the splitter) if self.current_splitter_rpn: @@ -1002,11 +1042,11 @@ def prepare_inputs(self): inputs_ind = [] # iter_splits using inputs from current state/node - self.inputs_ind = list(hlpst.iter_splits(inputs_ind, keys_inp)) + self._inputs_ind = list(hlpst.iter_splits(inputs_ind, keys_inp)) # removing elements that are connected to inner splitter # TODO - add tests to test_workflow.py (not sure if we want to remove it) for el in connected_to_inner: - [dict.pop(el) for dict in self.inputs_ind] + [dict.pop(el) for dict in self._inputs_ind] def splits(self, splitter_rpn): """ @@ -1150,20 +1190,3 @@ def _single_op_splits(self, op_single): val = op["*"](val_ind) keys = [op_single] return val, keys - - # def split(self, task_def: TaskDef[OutputsType]) -> list["TaskDef[OutputsType]"]: - # """ - # Split the task definition containing state-array fields into multiple tasks - # without splitters and non-state-array values. - - # Parameters - # ---------- - # task_def: TaskDef - # a task definition - - # Returns - # ------- - # List[TaskDef] - # a list of task definitions - # """ - # return hlpst.map_splits(self.states_ind, task_def, cont_dim=task_def._cont_dim) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 74a3726b47..a7a6e9e151 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -11,12 +11,16 @@ from .workers import Worker, WORKERS from .core import is_workflow from .graph import DiGraph -from .helpers import get_open_loop, load_and_run_async +from .helpers import ( + get_open_loop, + list_fields, +) from pydra.utils.hash import PersistentCache from .state import StateIndex from .audit import Audit from .core import Task from pydra.utils.messenger import AuditFlag, Messenger +from pydra.utils import user_cache_dir import logging @@ -30,34 +34,48 @@ # Used to flag development mode of Audit develop = False +WORKER_KWARG_FAIL_NOTE = "Attempting to instantiate worker submitter" + class Submitter: - """Send a task to the execution backend.""" + """Send a task to the execution backend. + + Parameters + ---------- + cache_dir : os.PathLike, optional + Cache directory where the working directory/results for the task will be + stored, by default None + worker : str or Worker, optional + The worker to use, by default "cf" + environment: Environment, optional + The execution environment to use, by default None + rerun : bool, optional + Whether to force the re-computation of the task results even if existing + results are found, by default False + cache_locations : list[os.PathLike], optional + Alternate cache locations to check for pre-computed results, by default None + audit_flags : AuditFlag, optional + Auditing configuration, by default AuditFlag.NONE + messengers : list, optional + Messengers, by default None + messenger_args : dict, optional + Messenger arguments, by default None + **kwargs : dict + Keyword arguments to pass on to the worker initialisation + """ def __init__( self, - worker: ty.Union[str, ty.Type[Worker]] = "cf", cache_dir: os.PathLike | None = None, - cache_locations: list[os.PathLike] | None = None, + worker: ty.Union[str, ty.Type[Worker]] = "cf", environment: "Environment | None" = None, + rerun: bool = False, + cache_locations: list[os.PathLike] | None = None, audit_flags: AuditFlag = AuditFlag.NONE, messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, - rerun: bool = False, **kwargs, ): - """ - Initialize task submission. - - Parameters - ---------- - plugin : :obj:`str` or :obj:`ty.Type[pydra.engine.core.Worker]` - Either the identifier of the execution backend or the worker class itself. - Default is ``cf`` (Concurrent Futures). - **kwargs - Additional keyword arguments to pass to the worker. - - """ self.audit = Audit( audit_flags=audit_flags, @@ -65,6 +83,11 @@ def __init__( messenger_args=messenger_args, develop=develop, ) + if cache_dir is None: + cache_dir = user_cache_dir / "run-cache" + cache_dir.mkdir(parents=True, exist_ok=True) + elif not cache_dir.exists(): + raise ValueError(f"Cache directory {str(cache_dir)!r} does not exist") self.cache_dir = cache_dir self.cache_locations = cache_locations self.environment = environment @@ -72,19 +95,31 @@ def __init__( self.loop = get_open_loop() self._own_loop = not self.loop.is_running() if isinstance(worker, str): - self.plugin = worker + self.worker_name = worker try: - worker_cls = WORKERS[self.plugin] + worker_cls = WORKERS[self.worker_name] except KeyError: - raise NotImplementedError(f"No worker for '{self.plugin}' plugin") + raise NotImplementedError(f"No worker for '{self.worker_name}' plugin") else: try: - self.plugin = worker.plugin_name + self.worker_name = worker.plugin_name except AttributeError: raise ValueError("Worker class must have a 'plugin_name' str attribute") worker_cls = worker - self.worker = worker_cls(**kwargs) - self.worker.loop = self.loop + try: + self._worker = worker_cls(**kwargs) + except TypeError as e: + e.add_note(WORKER_KWARG_FAIL_NOTE) + raise + self._worker.loop = self.loop + + @property + def worker(self): + if self._worker is None: + raise RuntimeError( + "Cannot access worker of unpickeld submitter (typically in subprocess)" + ) + return self._worker def __call__( self, @@ -92,44 +127,64 @@ def __call__( ): """Submitter run function.""" - task = Task(task_def, submitter=self, name="task") - self.loop.run_until_complete(self.submit_from_call(task)) - PersistentCache().clean_up() - return task.result() + task_def._check_rules() + # If the outer task is split, create an implicit workflow to hold the split nodes + if task_def._splitter: - async def submit_from_call(self, task: "Task"): - """ - This coroutine should only be called once per Submitter call, - and serves as the bridge between sync/async lands. + from pydra.design import workflow - There are 4 potential paths based on the type of runnable: - 0) Workflow has a different plugin than a submitter - 1) Workflow without State - 2) Task without State - 3) (Workflow or Task) with State + output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} - Once Python 3.10 is the minimum, this should probably be refactored into using - structural pattern matching. - """ - if is_workflow(task): # TODO: env to wf - # connect and calculate the checksum of the graph before running - task._create_graph_connections() # override_task_caches=True) - # 0 - if task.plugin and task.plugin != self.plugin: - # if workflow has a different plugin it's treated as a single element - await self.worker.run(task, rerun=self.rerun) - # 1 - # if runnable.state is None: - # await runnable._run(self, rerun=rerun) - # # 3 - # else: - await self.expand_runnable(task, wait=True) - else: - # 2 - await self.expand_runnable(task, wait=True) # TODO - return True + # We need to use a new variable as task_def will be overwritten by the time + # the Split workflow constructor is called + node_def = task_def + + @workflow.define(outputs=output_types) + def Split(): + node = workflow.add(node_def) + return tuple(getattr(node, o) for o in output_types) + + task_def = Split() - async def expand_runnable(self, runnable: "Task", wait=False): + elif task_def._combiner: + raise ValueError( + f"Task {self} is marked for combining, but not splitting. " + "Use the `split` method to split the task before combining." + ) + task = task_def.Task(task_def, submitter=self, name="task") + print(str(task.output_dir)) + self.loop.run_until_complete(self.expand_runnable(task)) + PersistentCache().clean_up() + print(str(task.output_dir)) + result = task.result() + if result is None: + if task.lockfile.exists(): + raise RuntimeError( + f"Task {task} has a lockfile, but no result was found. " + "This may be due to another submission process running, or the hard " + "interrupt (e.g. a debugging abortion) interrupting a previous run. " + f"In the case of an interrupted run, please remove {str(task.lockfile)!r} " + "and resubmit." + ) + raise RuntimeError(f"Task {task} has no result in {str(task.output_dir)!r}") + return result + + # def __getstate__(self): + # state = self.__dict__.copy() + # # Remove the unpicklable entries or those that should not be pickled + # # When unpickled (in another process) the submitter can't be called + # state["loop"] = None + # state["_worker"] = None + # return state + + # def __setstate__(self, state): + # self.__dict__.update(state) + # # Restore the loop and worker + # self.loop = get_open_loop() + # self.worker = WORKERS[self.plugin](**self.worker.__dict__) + # self.worker.loop = self.loop + + async def expand_runnable(self, task: "Task", wait=False): """ This coroutine handles state expansion. @@ -152,21 +207,13 @@ async def expand_runnable(self, runnable: "Task", wait=False): Coroutines for :class:`~pydra.engine.core.TaskBase` execution. """ - if runnable.plugin and runnable.plugin != self.plugin: - raise NotImplementedError() - futures = set() - task_pkl = await prepare_runnable(runnable) - - if is_workflow(runnable): - # job has no state anymore - futures.add( - # This unpickles and runs workflow - why are we pickling? - asyncio.create_task(load_and_run_async(task_pkl, self, self.rerun)) - ) + if is_workflow(task): + futures.add(asyncio.create_task(task.run(self.rerun))) else: - futures.add(self.worker.run((task_pkl, runnable), rerun=self.rerun)) + task_pkl = await prepare_runnable(task) + futures.add(self.worker.run((task_pkl, task), rerun=self.rerun)) if wait and futures: # if wait is True, we are at the end of the graph / state expansion. @@ -265,11 +312,7 @@ async def expand_workflow(self, task: "Task[WorkflowDef]"): logger.debug(f"Retrieving inputs for {task}") # TODO: add state idx to retrieve values to reduce waiting task.definition._retrieve_values(wf) - if task.state: - for fut in await self.expand_runnable(task): - task_futures.add(fut) - # expand that workflow - elif is_workflow(task): + if is_workflow(task): await task.run(self) # single task else: @@ -326,7 +369,7 @@ def get_runnable_tasks( # Record if the node has not been started if not node.started: not_started.add(node) - tasks.extend(node.get_runnable_tasks(graph, self)) + tasks.extend(node.get_runnable_tasks(graph)) return tasks @property @@ -371,12 +414,12 @@ def __init__(self, node: "Node", submitter: Submitter): self.submitter = submitter # Initialize the state dictionaries self._tasks = None - self.waiting = [] - self.successful = [] - self.errored = [] - self.running = [] + self.waiting = {} + self.successful = {} + self.errored = {} + self.running = {} self.unrunnable = defaultdict(list) - self.state_names = self.node.state_names + self.state_names = self.node.state.names def __getattr__(self, name: str) -> ty.Any: """Delegate attribute access to the underlying node.""" @@ -422,14 +465,14 @@ def all_failed(self) -> bool: def _generate_tasks(self) -> ty.Iterable["Task"]: if self.node.state is None: - yield Task( + yield self.node._definition.Task( definition=self.node._definition, submitter=self.submitter, name=self.node.name, ) else: for index, split_defn in self.node._split_definition().items(): - yield Task( + yield self.node._definition.Task( definition=split_defn, submitter=self.submitter, name=self.node.name, @@ -487,16 +530,17 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task"]: List of tasks that are ready to run """ runnable: list["Task"] = [] + self.tasks # Ensure tasks are loaded if not self.started: self.waiting = copy(self._tasks) # Check to see if any previously running tasks have completed - for index, task in copy(self.running.items()): + for index, task in list(self.running.items()): if task.done: self.successful[task.state_index] = self.running.pop(index) elif task.errored: self.errored[task.state_index] = self.running.pop(index) # Check to see if any waiting tasks are now runnable/unrunnable - for index, task in copy(self.waiting.items()): + for index, task in list(self.waiting.items()): pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: diff --git a/pydra/engine/task.py b/pydra/engine/task.py index bfea780803..f6679747d9 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -42,30 +42,21 @@ from __future__ import annotations import attrs -import typing as ty import json from pathlib import Path -import inspect -from fileformats.core import FileSet from .core import Task from pydra.utils.messenger import AuditFlag from .specs import ( PythonDef, ShellDef, - is_set, attrs_fields, ) from .helpers import ( attrs_values, - list_fields, ) -from pydra.engine.helpers_file import is_local_file, template_update_single -from pydra.utils.typing import TypeParser +from pydra.engine.helpers_file import is_local_file from .environments import Native -if ty.TYPE_CHECKING: - from pydra.design import shell - class PythonTask(Task): """Wrap a Python callable as a task element.""" @@ -157,132 +148,6 @@ def __init__( self.bindings = {} self.inputs_mod_root = {} - def get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: - """Return bindings necessary to run task in an alternative root. - - This is primarily intended for contexts when a task is going - to be run in a container with mounted volumes. - - Arguments - --------- - root: str - - Returns - ------- - bindings: dict - Mapping from paths in the host environment to the target environment - """ - - if root is None: - return {} - else: - self._prepare_bindings(root=root) - return self.bindings - - def command_args(self, root: Path | None = None) -> list[str]: - return self.definition._command_args( - input_updates=self.inputs_mod_root, root=root - ) - - def _run_task(self, environment=None): - if environment is None: - environment = self.environment - self.output_ = environment.execute(self) - - def _prepare_bindings(self, root: str): - """Prepare input files to be passed to the task - - This updates the ``bindings`` attribute of the current task to make files available - in an ``Environment``-defined ``root``. - """ - for fld in attrs_fields(self.definition): - if TypeParser.contains_type(FileSet, fld.type): - fileset = getattr(self.definition, fld.name) - copy = fld.copy_mode == FileSet.CopyMode.copy - - host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") - - # Default to mounting paths as read-only, but respect existing modes - old_mode = self.bindings.get(host_path, ("", "ro"))[1] - self.bindings[host_path] = (env_path, "rw" if copy else old_mode) - - # Provide in-container paths without type-checking - self.inputs_mod_root[fld.name] = tuple( - env_path / rel for rel in fileset.relative_fspaths - ) - - def resolve_output_value( - self, - fld: "shell.out", - stdout: str, - stderr: str, - ) -> ty.Any: - """Collect output file if metadata specified.""" - from pydra.design import shell - - if not self.definition.Outputs._required_fields_satisfied(fld, self.definition): - return None - elif isinstance(fld, shell.outarg) and fld.path_template: - return template_update_single( - fld, - definition=self.definition, - output_dir=self.output_dir, - spec_type="output", - ) - elif fld.callable: - callable_ = fld.callable - if isinstance(fld.callable, staticmethod): - # In case callable is defined as a static method, - # retrieve the function wrapped in the descriptor. - callable_ = fld.callable.__func__ - call_args = inspect.getfullargspec(callable_) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = fld - elif argnm == "output_dir": - call_args_val[argnm] = self.output_dir - elif argnm == "inputs": - call_args_val[argnm] = self.inputs - elif argnm == "stdout": - call_args_val[argnm] = stdout - elif argnm == "stderr": - call_args_val[argnm] = stderr - else: - try: - call_args_val[argnm] = self.inputs[argnm] - except KeyError as e: - e.add_note( - f"arguments of the callable function from {fld.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - raise - return callable_(**call_args_val) - else: - raise Exception( - f"Metadata for '{fld.name}', does not not contain any of the required fields " - f'("callable", "output_file_template" or "value"): {fld}.' - ) - - def generated_output_names(self, stdout: str, stderr: str): - """Returns a list of all outputs that will be generated by the task. - Takes into account the task input and the requires list for the output fields. - TODO: should be in all Output specs? - """ - # checking the input (if all mandatory fields are provided, etc.) - self.definition._check_rules() - output_names = ["return_code", "stdout", "stderr"] - for fld in list_fields(self): - # assuming that field should have either default or metadata, but not both - if is_set(fld.default): - output_names.append(fld.name) - elif is_set(self.resolve_output_value(fld, stdout, stderr)): - output_names.append(fld.name) - return output_names - - DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent - class BoshTask(ShellTask): diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 2d8362c7f2..2c458a494d 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -2813,7 +2813,7 @@ def gather_output(field, output_dir): assert all([file.fspath.exists() for file in res.output.newfile]) assert ( shelly.output_names - == shelly.generated_output_names + == shelly._generated_output_names == ["return_code", "stdout", "stderr", "newfile"] ) @@ -3314,7 +3314,7 @@ def get_lowest_directory(directory_path): ) assert ( shelly.output_names - == shelly.generated_output_names + == shelly._generated_output_names == ["return_code", "stdout", "stderr", "resultsDir"] ) res = results_function(shelly, plugin) @@ -3559,10 +3559,10 @@ def test_shell_cmd_inputspec_outputspec_2(): ) shelly.definition.file1 = "new_file_1.txt" shelly.definition.file2 = "new_file_2.txt" - # all fields from output_spec should be in output_names and generated_output_names + # all fields from output_spec should be in output_names and _generated_output_names assert ( shelly.output_names - == shelly.generated_output_names + == shelly._generated_output_names == ["return_code", "stdout", "stderr", "newfile1", "newfile2"] ) @@ -3625,7 +3625,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): output_spec=my_output_spec, ) shelly.definition.file1 = "new_file_1.txt" - # generated_output_names should know that newfile2 will not be generated + # _generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", "stdout", @@ -3633,7 +3633,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): "newfile1", "newfile2", ] - assert shelly.generated_output_names == [ + assert shelly._generated_output_names == [ "return_code", "stdout", "stderr", @@ -3759,7 +3759,7 @@ def test_shell_cmd_inputspec_outputspec_3a(): ) shelly.definition.file1 = "new_file_1.txt" shelly.definition.file2 = "new_file_2.txt" - # generated_output_names should know that newfile2 will not be generated + # _generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", "stdout", @@ -3767,7 +3767,7 @@ def test_shell_cmd_inputspec_outputspec_3a(): "newfile1", "newfile2", ] - assert shelly.generated_output_names == [ + assert shelly._generated_output_names == [ "return_code", "stdout", "stderr", @@ -3823,10 +3823,10 @@ def test_shell_cmd_inputspec_outputspec_4(): ) shelly.definition.file1 = "new_file_1.txt" shelly.definition.additional_inp = 2 - # generated_output_names should be the same as output_names + # _generated_output_names should be the same as output_names assert ( shelly.output_names - == shelly.generated_output_names + == shelly._generated_output_names == ["return_code", "stdout", "stderr", "newfile1"] ) diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 67fbdfe211..569febe48e 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -142,7 +142,7 @@ def run( **kwargs, ): """Run a task.""" - return self.exec_serial(task, rerun=rerun, environment=environment) + return self.exec_serial(task) def close(self): """Return whether the task is finished.""" @@ -151,13 +151,14 @@ async def exec_serial( self, task: "Task", rerun: bool = False, environment: Environment | None = None ): if isinstance(task, Task): - return task.run(rerun, environment=environment) + return task.run(rerun=rerun) else: # it could be tuple that includes pickle files with tasks and inputs task_main_pkl, _ = task - return load_and_run(task_main_pkl, rerun, environment=environment) + return load_and_run(task_main_pkl, rerun=rerun) async def fetch_finished(self, futures): - await asyncio.gather(*futures) + for future in futures: + await future return set() # async def fetch_finished(self, futures): @@ -198,7 +199,7 @@ async def exec_as_coro(self, runnable, rerun=False, environment=None): else: # it could be tuple that includes pickle files with tasks and inputs task_main_pkl, task_orig = runnable res = await self.loop.run_in_executor( - self.pool, load_and_run, task_main_pkl, rerun, environment + self.pool, load_and_run, task_main_pkl ) return res @@ -917,11 +918,11 @@ async def exec_dask( async with Client(**self.client_args, asynchronous=True) as client: if isinstance(task, Task): - future = client.submit(task._run, rerun) + future = client.submit(task) result = await future - else: # it could be tuple that includes pickle files with tasks and inputs - ind, task_main_pkl, task_orig = task - future = client.submit(load_and_run, task_main_pkl, ind, rerun) + else: # it could be a path to a pickled task file + assert isinstance(task, Path) + future = client.submit(load_and_run, task) result = await future return result From abcf93f4a7a05d4929a1318a19b626bbffac4ec3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 21 Jan 2025 17:49:21 +1100 Subject: [PATCH 139/342] moved run into PythonDef --- pydra/engine/specs.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 3d1b1d0688..5c104b1aaa 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -566,7 +566,27 @@ class PythonOutputs(TaskOutputs): class PythonDef(TaskDef[PythonOutputsType]): - pass + + def _run(self, environment=None): + inputs = attrs_values(self) + del inputs["function"] + self.output_ = None + output = self.function(**inputs) + output_names = [f.name for f in attrs.fields(self.Outputs)] + if output is None: + self.output_ = {nm: None for nm in output_names} + elif len(output_names) == 1: + # if only one element in the fields, everything should be returned together + self.output_ = {output_names[0]: output} + elif isinstance(output, tuple) and len(output_names) == len(output): + self.output_ = dict(zip(output_names, output)) + elif isinstance(output, dict): + self.output_ = {key: output.get(key, None) for key in output_names} + else: + raise RuntimeError( + f"expected {len(list_fields(self.Outputs))} elements, " + f"but {output} were returned" + ) class WorkflowOutputs(TaskOutputs): From e5c25568b8d5c9757aa72a2f45b7d70bea05a29f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 23 Jan 2025 15:56:24 +1100 Subject: [PATCH 140/342] implemented non-asynchronous "debug" worker that avoids async/await for easier debugging of workflows --- new-docs/source/tutorial/tst.py | 8 +- pydra/design/base.py | 12 +- pydra/engine/core.py | 158 +++++++++++--------- pydra/engine/environments.py | 16 +- pydra/engine/graph.py | 41 +++++- pydra/engine/lazy.py | 69 +++++---- pydra/engine/specs.py | 210 ++++++++++++++------------- pydra/engine/submitter.py | 130 ++++++++++++----- pydra/engine/tests/test_submitter.py | 4 +- pydra/engine/workers.py | 25 ++-- 10 files changed, 384 insertions(+), 289 deletions(-) diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 01cd176b4d..2da864d279 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -21,10 +21,12 @@ # Instantiate the task definition, "splitting" over all NIfTI files in the test directory # by splitting the "input" input field over all files in the directory - mrgrid = MrGrid(voxel=(0.5, 0.5, 0.5)).split(in_file=nifti_dir.iterdir()) + mrgrid = MrGrid(operation="regrid", voxel=(0.5, 0.5, 0.5)).split( + in_file=nifti_dir.iterdir() + ) # Run the task to resample all NIfTI files - outputs = mrgrid(worker="serial") + outputs = mrgrid(worker="cf") # Print the locations of the output files - print("\n".join(str(p) for p in outputs.outputs)) + print("\n".join(str(p) for p in outputs.out_file)) diff --git a/pydra/design/base.py b/pydra/design/base.py index e2080225bc..3ead73933c 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -388,8 +388,7 @@ def make_task_def( klass : type The class created using the attrs package """ - from pydra.engine.specs import TaskDef, WorkflowDef - from pydra.engine.core import Task, WorkflowTask + from pydra.engine.specs import TaskDef spec_type._check_arg_refs(inputs, outputs) @@ -400,7 +399,6 @@ def make_task_def( f"{reserved_names} are reserved and cannot be used for {spec_type} field names" ) outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) - task_type = WorkflowTask if issubclass(spec_type, WorkflowDef) else Task if klass is None or not issubclass(klass, spec_type): if name is None: raise ValueError("name must be provided if klass is not") @@ -419,19 +417,13 @@ def make_task_def( name=name, bases=bases, kwds={}, - exec_body=lambda ns: ns.update( - { - "Outputs": outputs_klass, - "Task": task_type, - } - ), + exec_body=lambda ns: ns.update({"Outputs": outputs_klass}), ) else: # Ensure that the class has it's own annotations dict so we can modify it without # messing up other classes klass.__annotations__ = copy(klass.__annotations__) klass.Outputs = outputs_klass - klass.Task = task_type # Now that we have saved the attributes in lists to be for arg in inputs.values(): # If an outarg input then the field type should be Path not a FileSet diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 4db3f009d0..d6f4c2ac88 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -43,6 +43,7 @@ ) from .helpers_file import copy_nested_files, template_update from pydra.utils.messenger import AuditFlag +from pydra.engine.environments import Environment, Native logger = logging.getLogger("pydra") @@ -85,7 +86,8 @@ class Task(ty.Generic[DefType]): name: str definition: DefType - submitter: "Submitter" + submitter: "Submitter | None" + environment: "Environment | None" state_index: state.StateIndex _inputs: dict[str, ty.Any] | None = None @@ -95,6 +97,7 @@ def __init__( definition: DefType, submitter: "Submitter", name: str, + environment: "Environment | None" = None, state_index: "state.StateIndex | None" = None, ): """ @@ -121,14 +124,16 @@ def __init__( if state_index is None: state_index = state.StateIndex() - self.definition = definition + # Copy the definition, so lazy fields can be resolved and replaced at runtime + self.definition = copy(definition) + # We save the submitter is the definition is a workflow otherwise we don't + # so the task can be pickled + self.submitter = submitter if is_workflow(definition) else None + self.environment = environment if environment is not None else Native() self.name = name self.state_index = state_index - # checking if metadata is set properly - self.definition._check_resolved() - self.definition._check_rules() - self._output = {} + self.return_values = {} self._result = {} # flag that says if node finished all jobs self._done = False @@ -151,6 +156,11 @@ def __init__( def cache_dir(self): return self._cache_dir + @property + def is_async(self) -> bool: + """Check to see if the task should be run asynchronously.""" + return self.submitter.worker.is_async and is_workflow(self.definition) + @cache_dir.setter def cache_dir(self, path: os.PathLike): self._cache_dir = Path(path) @@ -315,6 +325,21 @@ def _populate_filesystem(self): self.output_dir.mkdir(parents=False, exist_ok=self.can_resume) def run(self, rerun: bool = False): + """Prepare the task working directory, execute the task definition, and save the + results. + + Parameters + ---------- + rerun : bool + If True, the task will be re-run even if a result already exists. Will + propagated to all tasks within workflow tasks. + """ + # TODO: After these changes have been merged, will refactor this function and + # run_async to use common helper methods for pre/post run tasks + + # checking if the definition is fully resolved and ready to run + self.definition._check_resolved() + self.definition._check_rules() self.hooks.pre_run(self) logger.debug( "'%s' is attempting to acquire lock on %s", self.name, self.lockfile @@ -334,8 +359,8 @@ def run(self, rerun: bool = False): self.audit.audit_task(task=self) try: self.audit.monitor() - run_outputs = self.definition._run(self) - result.outputs = self.definition.Outputs.from_task(self, run_outputs) + self.definition._run(self) + result.outputs = self.definition.Outputs._from_task(self) except Exception: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) @@ -355,6 +380,57 @@ def run(self, rerun: bool = False): self._check_for_hash_changes() return result + async def run_async(self, rerun: bool = False): + """Prepare the task working directory, execute the task definition asynchronously, + and save the results. NB: only workflows are run asynchronously at the moment. + + Parameters + ---------- + rerun : bool + If True, the task will be re-run even if a result already exists. Will + propagated to all tasks within workflow tasks. + """ + # checking if the definition is fully resolved and ready to run + self.definition._check_resolved() + self.definition._check_rules() + self.hooks.pre_run(self) + logger.debug( + "'%s' is attempting to acquire lock on %s", self.name, self.lockfile + ) + async with PydraFileLock(self.lockfile): + if not rerun: + result = self.result() + if result is not None and not result.errored: + return result + cwd = os.getcwd() + self._populate_filesystem() + result = Result(outputs=None, runtime=None, errored=False, task=self) + self.hooks.pre_run_task(self) + self.audit.start_audit(odir=self.output_dir) + try: + self.audit.monitor() + await self.definition._run(self) + result.outputs = self.definition.Outputs._from_task(self) + except Exception: + etype, eval, etr = sys.exc_info() + traceback = format_exception(etype, eval, etr) + record_error(self.output_dir, error=traceback) + result.errored = True + self._errored = True + raise + finally: + self.hooks.post_run_task(self, result) + self.audit.finalize_audit(result=result) + save(self.output_dir, result=result, task=self) + # removing the additional file with the checksum + (self.cache_dir / f"{self.uid}_info.json").unlink() + os.chdir(cwd) + self.hooks.post_run(self, result) + # Check for any changes to the input hashes that have occurred during the execution + # of the task + self._check_for_hash_changes() + return result + def pickle_task(self): """Pickling the tasks with full inputs""" pkl_files = self.cache_dir / "pkl_files" @@ -398,7 +474,7 @@ def _combined_output(self, return_inputs=False): else: return combined_results - def result(self, state_index=None, return_inputs=False): + def result(self, return_inputs=False): """ Retrieve the outcomes of this particular task. @@ -415,13 +491,9 @@ def result(self, state_index=None, return_inputs=False): result : Result the result of the task """ - # TODO: check if result is available in load_result and - # return a future if not if self.errored: return Result(outputs=None, runtime=None, errored=True, task=self) - if state_index is not None: - raise ValueError("Task does not have a state") checksum = self.checksum result = load_result(checksum, self.cache_locations) if result and result.errored: @@ -483,58 +555,6 @@ def _check_for_hash_changes(self): DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any -class WorkflowTask(Task): - - def __init__( - self, - definition: DefType, - submitter: "Submitter", - name: str, - state_index: "state.StateIndex | None" = None, - ): - super().__init__(definition, submitter, name, state_index) - self.submitter = submitter - - async def run(self, rerun: bool = False): - self.hooks.pre_run(self) - logger.debug( - "'%s' is attempting to acquire lock on %s", self.name, self.lockfile - ) - async with PydraFileLock(self.lockfile): - if not rerun: - result = self.result() - if result is not None and not result.errored: - return result - cwd = os.getcwd() - self._populate_filesystem() - result = Result(outputs=None, runtime=None, errored=False, task=self) - self.hooks.pre_run_task(self) - self.audit.start_audit(odir=self.output_dir) - try: - self.audit.monitor() - await self.submitter.expand_workflow(self) - result.outputs = self.definition.Outputs.from_task(self) - except Exception: - etype, eval, etr = sys.exc_info() - traceback = format_exception(etype, eval, etr) - record_error(self.output_dir, error=traceback) - result.errored = True - self._errored = True - raise - finally: - self.hooks.post_run_task(self, result) - self.audit.finalize_audit(result=result) - save(self.output_dir, result=result, task=self) - # removing the additional file with the checksum - (self.cache_dir / f"{self.uid}_info.json").unlink() - os.chdir(cwd) - self.hooks.post_run(self, result) - # Check for any changes to the input hashes that have occurred during the execution - # of the task - self._check_for_hash_changes() - return result - - logger = logging.getLogger("pydra") OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) @@ -847,18 +867,12 @@ def create_dotfile(self, type="simple", export=None, name=None, output_dir=None) return dotfile, formatted_dot -def is_task(obj): - """Check whether an object looks like a task.""" - return hasattr(obj, "_run_task") - - def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" from pydra.engine.specs import WorkflowDef from pydra.engine.core import Workflow - from pydra.engine.core import WorkflowTask - return isinstance(obj, (WorkflowDef, WorkflowTask, Workflow)) + return isinstance(obj, (WorkflowDef, Workflow)) def has_lazy(obj): diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 1481e14f7b..f0d1d9ee9b 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -3,7 +3,7 @@ from pathlib import Path if ty.TYPE_CHECKING: - from pydra.engine.task import ShellTask + from pydra.engine.core import Task class Environment: @@ -17,7 +17,7 @@ class Environment: def setup(self): pass - def execute(self, task: "ShellTask") -> dict[str, ty.Any]: + def execute(self, task: "Task") -> dict[str, ty.Any]: """ Execute the task in the environment. @@ -42,12 +42,12 @@ class Native(Environment): Native environment, i.e. the tasks are executed in the current python environment. """ - def execute(self, task: "ShellTask") -> dict[str, ty.Any]: + def execute(self, task: "Task") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] - values = execute(task.definition._command_args(), strip=task.strip) + values = execute(task.definition._command_args()) output = dict(zip(keys, values)) if output["return_code"]: - msg = f"Error running '{task.name}' task with {task.command_args()}:" + msg = f"Error running '{task.name}' task with {task.definition._command_args()}:" if output["stderr"]: msg += "\n\nstderr:\n" + output["stderr"] if output["stdout"]: @@ -90,7 +90,7 @@ def bind(self, loc, mode="ro"): class Docker(Container): """Docker environment.""" - def execute(self, task: "ShellTask") -> dict[str, ty.Any]: + def execute(self, task: "Task") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations mounts = task.definition._get_bindings(root=self.root) @@ -112,7 +112,6 @@ def execute(self, task: "ShellTask") -> dict[str, ty.Any]: values = execute( docker_args + [docker_img] + task.definition._command_args(root=self.root), - strip=task.strip, ) output = dict(zip(keys, values)) if output["return_code"]: @@ -126,7 +125,7 @@ def execute(self, task: "ShellTask") -> dict[str, ty.Any]: class Singularity(Container): """Singularity environment.""" - def execute(self, task: "ShellTask") -> dict[str, ty.Any]: + def execute(self, task: "Task") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations mounts = task.definition._get_bindings(root=self.root) @@ -151,7 +150,6 @@ def execute(self, task: "ShellTask") -> dict[str, ty.Any]: singularity_args + [singularity_img] + task.definition._command_args(root=self.root), - strip=task.strip, ) output = dict(zip(keys, values)) if output["return_code"]: diff --git a/pydra/engine/graph.py b/pydra/engine/graph.py index 25b8ef6a74..4a3beca3aa 100644 --- a/pydra/engine/graph.py +++ b/pydra/engine/graph.py @@ -2,15 +2,28 @@ from copy import copy from pathlib import Path +import typing as ty import subprocess as sp from .helpers import ensure_list -class DiGraph: +NodeType = ty.TypeVar("NodeType") + + +class DiGraph(ty.Generic[NodeType]): """A simple Directed Graph object.""" - def __init__(self, name=None, nodes=None, edges=None): + name: str + nodes: list[NodeType] + edges: list[tuple[NodeType, NodeType]] + + def __init__( + self, + name: str | None = None, + nodes: ty.Iterable[NodeType] | None = None, + edges: ty.Iterable[tuple[NodeType, NodeType]] | None = None, + ): """ Initialize a directed graph. @@ -32,6 +45,7 @@ def __init__(self, name=None, nodes=None, edges=None): self._sorted_nodes = None self._node_wip = [] self._nodes_details = {} + self._node_lookup = {} def copy(self): """ @@ -59,20 +73,31 @@ def copy(self): return new_graph @property - def nodes(self): + def nodes(self) -> list[NodeType]: """Get a list of the nodes currently contained in the graph.""" return self._nodes @nodes.setter - def nodes(self, nodes): + def nodes(self, nodes: ty.Iterable[NodeType]) -> None: if nodes: nodes = ensure_list(nodes) if len(set(nodes)) != len(nodes): raise Exception("nodes have repeated elements") self._nodes = nodes + def node(self, name: str) -> NodeType: + """Get a node by its name, caching the lookup directory""" + try: + return self._node_lookup[name] + except KeyError: + self._node_lookup = self.nodes_names_map + try: + return self._node_lookup[name] + except KeyError: + raise KeyError(f"Node {name!r} not found in graph") from None + @property - def nodes_names_map(self): + def nodes_names_map(self) -> dict[str, NodeType]: """Get a map of node names to nodes.""" return {nd.name: nd for nd in self.nodes} @@ -257,6 +282,8 @@ def remove_nodes(self, nodes, check_ready=True): self._sorted_nodes.remove(nd) # starting from the previous sorted list, so is faster self.sorting(presorted=self.sorted_nodes) + # Reset the node lookup + self._node_lookup = {} def remove_nodes_connections(self, nodes): """ @@ -278,6 +305,8 @@ def remove_nodes_connections(self, nodes): self.successors.pop(nd.name) self.predecessors.pop(nd.name) self._node_wip.remove(nd) + # Reset the node lookup + self._node_lookup = {} def remove_previous_connections(self, nodes): """ @@ -300,6 +329,8 @@ def remove_previous_connections(self, nodes): self.successors.pop(nd.name) self.predecessors.pop(nd.name) self._node_wip.remove(nd) + # Reset the node lookup + self._node_lookup = {} def _checking_successors_nodes(self, node, remove=True): if self.successors[node.name]: diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 6c8ddce71f..c1b0a8820d 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -7,7 +7,9 @@ from . import node if ty.TYPE_CHECKING: - from .base import Workflow + from .graph import DiGraph + from .submitter import NodeExecution + from .core import Task, Workflow T = ty.TypeVar("T") @@ -124,7 +126,9 @@ class LazyOutField(LazyField[T]): def name(self) -> str: return self.node.name - def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: + def get_value( + self, graph: "DiGraph[NodeExecution]", state_index: ty.Optional[int] = None + ) -> ty.Any: """Return the value of a lazy field. Parameters @@ -143,49 +147,44 @@ def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty. TypeParser, ) # pylint: disable=import-outside-toplevel - result = self.node.result(state_index=state_index) - if result is None: - raise RuntimeError( - f"Could not find results of '{self.node.name}' node in a sub-directory " - f"named '{self.node.checksum}' in any of the cache locations.\n" - + "\n".join(str(p) for p in set(self.node.cache_locations)) - + f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. " - f"Current values and hashes: {self.node.inputs}, " - f"{self.node.inputs._hashes}\n\n" - "Set loglevel to 'debug' in order to track hash changes " - "throughout the execution of the workflow.\n\n " - "These issues may have been caused by `bytes_repr()` methods " - "that don't return stable hash values for specific object " - "types across multiple processes (see bytes_repr() " - '"singledispatch "function in pydra/utils/hash.py).' - "You may need to write specific `bytes_repr()` " - "implementations (see `pydra.utils.hash.register_serializer`) or a " - "`__bytes_repr__()` dunder methods to handle one or more types in " - "your interface inputs." - ) + task = graph.node(self.node.name).task(state_index) _, split_depth = TypeParser.strip_splits(self.type) - def get_nested_results(res, depth: int): - if isinstance(res, list): - if not depth: - val = [r.get_output_field(self.field) for r in res] - else: - val = StateArray[self.type]( - get_nested_results(res=r, depth=depth - 1) for r in res - ) + def get_nested(task: "Task", depth: int): + if isinstance(task, StateArray): + val = [get_nested(task=t, depth=depth - 1) for t in task] + if depth: + val = StateArray[self.type](val) else: - if res.errored: + if task.errored: raise ValueError( f"Cannot retrieve value for {self.field} from {self.name} as " "the node errored" ) + res = task.result() + if res is None: + raise RuntimeError( + f"Could not find results of '{task.name}' node in a sub-directory " + f"named '{{{task.checksum}}}' in any of the cache locations.\n" + + "\n".join(str(p) for p in set(task.cache_locations)) + + f"\n\nThis is likely due to hash changes in '{task.name}' node inputs. " + f"Current values and hashes: {task.inputs}, " + f"{task.definition._hash}\n\n" + "Set loglevel to 'debug' in order to track hash changes " + "throughout the execution of the workflow.\n\n " + "These issues may have been caused by `bytes_repr()` methods " + "that don't return stable hash values for specific object " + "types across multiple processes (see bytes_repr() " + '"singledispatch "function in pydra/utils/hash.py).' + "You may need to write specific `bytes_repr()` " + "implementations (see `pydra.utils.hash.register_serializer`) or a " + "`__bytes_repr__()` dunder methods to handle one or more types in " + "your interface inputs." + ) val = res.get_output_field(self.field) - if depth and not wf._pre_split: - assert isinstance(val, ty.Sequence) and not isinstance(val, str) - val = StateArray[self.type](val) return val - value = get_nested_results(result, depth=split_depth) + value = get_nested(task, depth=split_depth) value = self._apply_cast(value) return value diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 5c104b1aaa..6898b92746 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -37,6 +37,9 @@ if ty.TYPE_CHECKING: from pydra.engine.core import Task + from pydra.engine.graph import DiGraph + from pydra.engine.submitter import NodeExecution + from pydra.engine.lazy import LazyOutField from pydra.engine.task import ShellTask from pydra.engine.core import Workflow from pydra.engine.environments import Environment @@ -53,33 +56,25 @@ class TaskOutputs: RESERVED_FIELD_NAMES = ("inputs",) - @classmethod - def from_task(cls, task: "Task") -> Self: - """Collect the outputs of a task from a combination of the provided inputs, - the objects in the output directory, and the stdout and stderr of the process. - - Parameters - ---------- - task : Task - The task whose outputs are being collected. + @property + def inputs(self): + """The inputs object associated with a lazy-outputs object""" + return self._get_node().inputs - Returns - ------- - outputs : Outputs - The outputs of the task - """ + @classmethod + def _from_defaults(cls) -> Self: + """Create an output object from the default values of the fields""" return cls( **{ - f.name: task.output_.get(f.name, attrs.NOTHING) + f.name: ( + f.default.factory() + if isinstance(f.default, attrs.Factory) + else f.default + ) for f in attrs_fields(cls) } ) - @property - def inputs(self): - """The inputs object associated with a lazy-outputs object""" - return self._get_node().inputs - def _get_node(self): try: return self._node @@ -88,12 +83,8 @@ def _get_node(self): f"{self} outputs object is not a lazy output of a workflow node" ) from None - def __iter__(self) -> ty.Generator[str, None, None]: - """Iterate through all the names in the definition""" - return (f.name for f in list_fields(self)) - def __getitem__(self, name: str) -> ty.Any: - """Return the value for the given attribute, resolving any templates + """Return the value for the given attribute Parameters ---------- @@ -118,8 +109,6 @@ def __getitem__(self, name: str) -> ty.Any: class TaskDef(ty.Generic[OutputsType]): """Base class for all task definitions""" - Task: "ty.Type[core.Task]" - # The following fields are used to store split/combine state information _splitter = attrs.field(default=None, init=False, repr=False) _combiner = attrs.field(default=None, init=False, repr=False) @@ -387,7 +376,7 @@ def _retrieve_values(self, wf, state_index=None): """Parse output results.""" temp_values = {} for field in attrs_fields(self): - value = getattr(self, field.name) + value: "LazyOutField" = getattr(self, field.name) if is_lazy(value): temp_values[field.name] = value.get_value(wf, state_index=state_index) for field, val in temp_values.items(): @@ -397,43 +386,36 @@ def _check_rules(self): """Check if all rules are satisfied.""" field: Arg + errors = [] for field in list_fields(self): value = getattr(self, field.name) if is_lazy(value): continue + if value is attrs.NOTHING: + errors.append(f"Mandatory field {field.name!r} is not set") + # Collect alternative fields associated with this field. if field.xor: - alternative_fields = { - name: getattr(self, name) - for name in field.xor - if name != field.name - } - set_alternatives = {n: v for n, v in alternative_fields.items() if v} - - # Raise error if no field in mandatory alternative group is set. - if not is_set(value): - if set_alternatives: - continue - message = f"{field.name} is mandatory and unset." - if alternative_fields: - raise AttributeError( - message[:-1] - + f", and no alternative provided in {list(alternative_fields)}." - ) - else: - raise AttributeError(message) - - # Raise error if multiple alternatives are set. - elif set_alternatives: - raise AttributeError( - f"{field.name} is mutually exclusive with {set_alternatives}" + mutually_exclusive = {name: getattr(self, name) for name in field.xor} + are_set = [ + f"{n}={v!r}" for n, v in mutually_exclusive.items() if v is not None + ] + if len(are_set) > 1: + errors.append( + f"Mutually exclusive fields {field.xor} are set together: " + + ", ".join(are_set) + ) + elif not are_set: + errors.append( + f"At least one of the mutually exclusive fields {field.xor} " + f"should be set" ) # Raise error if any required field is unset. if ( - value + value is not None and field.requires and not any(rs.satisfied(self) for rs in field.requires) ): @@ -443,9 +425,14 @@ def _check_rules(self): ) else: qualification = "" - raise ValueError( + errors.append( f"{field.name!r} requires{qualification} {[str(r) for r in field.requires]}" ) + if errors: + raise ValueError( + f"Found the following errors in task {self} definition:\n" + + "\n".join(errors) + ) @classmethod def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: @@ -472,10 +459,10 @@ def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: def _check_resolved(self): """Checks that all the fields in the definition have been resolved""" - if has_lazy_values := [n for n, v in attrs_values(self).items() if is_lazy(v)]: + if lazy_values := [n for n, v in attrs_values(self).items() if is_lazy(v)]: raise ValueError( f"Cannot execute {self} because the following fields " - f"still have lazy values {has_lazy_values}" + f"still have lazy values {lazy_values}" ) @@ -559,7 +546,28 @@ class RuntimeSpec: class PythonOutputs(TaskOutputs): - pass + + @classmethod + def _from_task(cls, task: "Task") -> Self: + """Collect the outputs of a task from a combination of the provided inputs, + the objects in the output directory, and the stdout and stderr of the process. + + Parameters + ---------- + task : Task + The task whose outputs are being collected. + outputs_dict : dict[str, ty.Any] + The outputs of the task, as a dictionary + + Returns + ------- + outputs : Outputs + The outputs of the task in dataclass + """ + outputs = cls._from_defaults() + for name, val in task.return_values.items(): + setattr(outputs, name, val) + return outputs PythonOutputsType = ty.TypeVar("OutputType", bound=PythonOutputs) @@ -567,32 +575,34 @@ class PythonOutputs(TaskOutputs): class PythonDef(TaskDef[PythonOutputsType]): - def _run(self, environment=None): + def _run(self, task: "Task") -> None: + # Prepare the inputs to the function inputs = attrs_values(self) del inputs["function"] - self.output_ = None - output = self.function(**inputs) - output_names = [f.name for f in attrs.fields(self.Outputs)] - if output is None: - self.output_ = {nm: None for nm in output_names} - elif len(output_names) == 1: + # Run the actual function + returned = self.function(**inputs) + # Collect the outputs and save them into the task.return_values dictionary + self.return_values = {f.name: f.default for f in attrs.fields(self.Outputs)} + return_names = list(self.return_values) + if returned is None: + self.return_values = {nm: None for nm in return_names} + elif len(self.return_values) == 1: # if only one element in the fields, everything should be returned together - self.output_ = {output_names[0]: output} - elif isinstance(output, tuple) and len(output_names) == len(output): - self.output_ = dict(zip(output_names, output)) - elif isinstance(output, dict): - self.output_ = {key: output.get(key, None) for key in output_names} + self.return_values = {list(self.return_values)[0]: returned} + elif isinstance(returned, tuple) and len(return_names) == len(returned): + self.return_values = dict(zip(return_names, returned)) + elif isinstance(returned, dict): + self.return_values = {key: returned.get(key, None) for key in return_names} else: raise RuntimeError( - f"expected {len(list_fields(self.Outputs))} elements, " - f"but {output} were returned" + f"expected {len(return_names)} elements, but {returned} were returned" ) class WorkflowOutputs(TaskOutputs): @classmethod - def from_task(cls, task: "Task") -> Self: + def _from_task(cls, task: "Task") -> Self: """Collect the outputs of a workflow task from the outputs of the nodes in the Parameters @@ -605,24 +615,28 @@ def from_task(cls, task: "Task") -> Self: outputs : Outputs The outputs of the task """ - outputs = super().from_task(task) - wf = task.definition.construct() + outputs = cls._from_defaults() # collecting outputs from tasks output_wf = {} - for name, lazy_field in wf.outputs.items(): + lazy_field: lazy.LazyOutField + workflow: "Workflow" = task.return_values["workflow"] + exec_graph: "DiGraph[NodeExecution]" = task.return_values["exec_graph"] + nodes_dict = {n.name: n for n in exec_graph.nodes} + for name, lazy_field in attrs_values(workflow.outputs).items(): try: - val_out = lazy_field.get_value(wf) + val_out = lazy_field.get_value(exec_graph) output_wf[name] = val_out - except (ValueError, AttributeError) as e: + except (ValueError, AttributeError): output_wf[name] = None - node = wf[lazy_field.name] + node: "NodeExecution" = nodes_dict[lazy_field.name] # checking if the tasks has predecessors that raises error - if isinstance(node._errored, list): + if isinstance(node.errored, list): raise ValueError(f"Tasks {node._errored} raised an error") else: err_files = [(t.output_dir / "_error.pklz") for t in node.tasks] - if not all(err_files): - raise e + err_files = [f for f in err_files if f.exists()] + if not err_files: + raise raise ValueError( f"Task {lazy_field.name} raised an error, full crash report is " f"here: " @@ -645,6 +659,13 @@ class WorkflowDef(TaskDef[WorkflowOutputsType]): _constructed = attrs.field(default=None, init=False) + def _run(self, task: "Task") -> None: + """Run the workflow.""" + if task.submitter.worker.is_async: + task.submitter.expand_workflow_async(task) + else: + task.submitter.expand_workflow(task) + def construct(self) -> "Workflow": from pydra.engine.core import Workflow @@ -667,10 +688,7 @@ class ShellOutputs(TaskOutputs): stderr: str = shell.out(help=STDERR_HELP) @classmethod - def from_task( - cls, - task: "ShellTask", - ) -> Self: + def _from_task(cls, task: "ShellTask") -> Self: """Collect the outputs of a shell process from a combination of the provided inputs, the objects in the output directory, and the stdout and stderr of the process. @@ -692,23 +710,21 @@ def from_task( outputs : ShellOutputs The outputs of the shell process """ - - outputs = super().from_task(task) - + outputs = cls._from_defaults() fld: shell.out for fld in list_fields(cls): if fld.name in ["return_code", "stdout", "stderr"]: - continue + resolved_value = task.return_values[fld.name] # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides - if isinstance(fld, shell.outarg) and is_set( + elif isinstance(fld, shell.outarg) and is_set( getattr(task.definition, fld.name) ): resolved_value = task.inputs[fld.name] elif is_set(fld.default): resolved_value = cls._resolve_default_value(fld, task.output_dir) else: - resolved_value = cls._resolve_value(fld, outputs.stdout, outputs.stderr) + resolved_value = cls._resolve_value(fld, task) # Set the resolved value setattr(outputs, fld.name, resolved_value) return outputs @@ -769,7 +785,6 @@ def _resolve_value( cls, fld: "shell.out", task: "Task", - outputs: dict[str, ty.Any], ) -> ty.Any: """Collect output file if metadata specified.""" from pydra.design import shell @@ -799,9 +814,9 @@ def _resolve_value( elif argnm == "inputs": call_args_val[argnm] = task.inputs elif argnm == "stdout": - call_args_val[argnm] = outputs["stdout"] + call_args_val[argnm] = task.return_values["stdout"] elif argnm == "stderr": - call_args_val[argnm] = outputs["stderr"] + call_args_val[argnm] = task.return_values["stderr"] else: try: call_args_val[argnm] = task.inputs[argnm] @@ -827,9 +842,9 @@ class ShellDef(TaskDef[ShellOutputsType]): RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) - def _run(self, environment: "Environment") -> None: + def _run(self, task: "Task") -> None: """Run the shell command.""" - return environment.execute(self) + task.return_values = task.environment.execute(task) @property def cmdline(self) -> str: @@ -1189,6 +1204,3 @@ def argstr_formatting( .strip() ) return argstr_formatted - - -from pydra.engine import core # noqa: E402 diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index a7a6e9e151..df0512e076 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -17,6 +17,7 @@ ) from pydra.utils.hash import PersistentCache from .state import StateIndex +from pydra.utils.typing import StateArray from .audit import Audit from .core import Task from pydra.utils.messenger import AuditFlag, Messenger @@ -111,6 +112,7 @@ def __init__( except TypeError as e: e.add_note(WORKER_KWARG_FAIL_NOTE) raise + self.worker_kwargs = kwargs self._worker.loop = self.loop @property @@ -151,11 +153,12 @@ def Split(): f"Task {self} is marked for combining, but not splitting. " "Use the `split` method to split the task before combining." ) - task = task_def.Task(task_def, submitter=self, name="task") - print(str(task.output_dir)) - self.loop.run_until_complete(self.expand_runnable(task)) + task = Task(task_def, submitter=self, name="task", environment=self.environment) + if task.is_async: + self.loop.run_until_complete(self.expand_runnable_async(task)) + else: + self.expand_runnable(task) PersistentCache().clean_up() - print(str(task.output_dir)) result = task.result() if result is None: if task.lockfile.exists(): @@ -169,22 +172,49 @@ def Split(): raise RuntimeError(f"Task {task} has no result in {str(task.output_dir)!r}") return result - # def __getstate__(self): - # state = self.__dict__.copy() - # # Remove the unpicklable entries or those that should not be pickled - # # When unpickled (in another process) the submitter can't be called - # state["loop"] = None - # state["_worker"] = None - # return state - - # def __setstate__(self, state): - # self.__dict__.update(state) - # # Restore the loop and worker - # self.loop = get_open_loop() - # self.worker = WORKERS[self.plugin](**self.worker.__dict__) - # self.worker.loop = self.loop - - async def expand_runnable(self, task: "Task", wait=False): + def __getstate__(self): + state = self.__dict__.copy() + # Remove the unpicklable entries or those that should not be pickled + # When unpickled (in another process) the submitter can't be called + state["loop"] = None + state["_worker"] = None + return state + + def __setstate__(self, state): + self.__dict__.update(state) + # Restore the loop and worker + self.loop = get_open_loop() + self._worker = WORKERS[self.worker_name](**self.worker_kwargs) + self.worker.loop = self.loop + + def expand_runnable(self, task: "Task"): + """ + This coroutine handles state expansion. + + Removes any states from `runnable`. If `wait` is + set to False (default), aggregates all worker + execution coroutines and returns them. If `wait` is + True, waits for all coroutines to complete / error + and returns None. + + Parameters + ---------- + runnable : pydra Task + Task instance (`Task`, `Workflow`) + wait : bool (False) + Await all futures before completing + + Returns + ------- + futures : set or None + Coroutines for :class:`~pydra.engine.core.TaskBase` execution. + + """ + task.run(rerun=self.rerun) + + async def expand_runnable_async( + self, task: "Task", wait=False + ) -> set[ty.Coroutine] | None: """ This coroutine handles state expansion. @@ -209,7 +239,7 @@ async def expand_runnable(self, task: "Task", wait=False): """ futures = set() - if is_workflow(task): + if is_workflow(task.definition): futures.add(asyncio.create_task(task.run(self.rerun))) else: task_pkl = await prepare_runnable(task) @@ -223,21 +253,39 @@ async def expand_runnable(self, task: "Task", wait=False): # pass along futures to be awaited independently return futures - async def expand_workflow(self, task: "Task[WorkflowDef]"): - """ - Expand and execute a stateless :class:`~pydra.engine.core.Workflow`. - This method is only reached by `Workflow._run_task`. + def expand_workflow(self, workflow_task: "Task[WorkflowDef]") -> None: + """Expands and executes a workflow task synchronously. Typically only used during + debugging and testing, as the asynchronous version is more efficient. Parameters ---------- - task : :obj:`~pydra.engine.core.WorkflowTask` + task : :obj:`~pydra.engine.core.Task[WorkflowDef]` Workflow Task object - Returns - ------- - wf : :obj:`pydra.engine.workflow.Workflow` - The computed workflow + """ + # Construct the workflow + wf = workflow_task.definition.construct() + # Generate the execution graph + exec_graph = wf.execution_graph(submitter=self) + tasks = self.get_runnable_tasks(exec_graph) + while tasks or any(not n.done for n in exec_graph.nodes): + for task in tasks: + # grab inputs if needed + logger.debug(f"Retrieving inputs for {task}") + # TODO: add state idx to retrieve values to reduce waiting + task.definition._retrieve_values(wf) + self.worker.run(task, rerun=self.rerun) + tasks = self.get_runnable_tasks(exec_graph) + workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} + async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: + """ + Expand and execute a workflow task asynchronously. + + Parameters + ---------- + task : :obj:`~pydra.engine.core.Task[WorkflowDef]` + Workflow Task object """ wf = task.definition.construct() # Generate the execution graph @@ -319,7 +367,7 @@ async def expand_workflow(self, task: "Task[WorkflowDef]"): task_futures.add(self.worker.run(task, rerun=self.rerun)) task_futures = await self.worker.fetch_finished(task_futures) tasks = self.get_runnable_tasks(exec_graph) - return wf + task.return_values = {"workflow": wf, "exec_graph": exec_graph} def __enter__(self): return self @@ -421,9 +469,13 @@ def __init__(self, node: "Node", submitter: Submitter): self.unrunnable = defaultdict(list) self.state_names = self.node.state.names - def __getattr__(self, name: str) -> ty.Any: - """Delegate attribute access to the underlying node.""" - return getattr(self.node, name) + @property + def inputs(self) -> "Node.Inputs": + return self.node.inputs + + @property + def _definition(self) -> "Node": + return self.node._definition @property def tasks(self) -> ty.Iterable["Task"]: @@ -431,16 +483,14 @@ def tasks(self) -> ty.Iterable["Task"]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def task(self, index: StateIndex | None = None) -> "Task": + def task(self, index: StateIndex | None = None) -> "Task | list[Task]": """Get a task object for a given state index.""" self.tasks # Ensure tasks are loaded try: return self._tasks[index] except KeyError: if index is None: - raise KeyError( - f"{self!r} has been split, so a state index must be provided" - ) from None + return StateArray(self._tasks.values()) raise @property @@ -465,14 +515,14 @@ def all_failed(self) -> bool: def _generate_tasks(self) -> ty.Iterable["Task"]: if self.node.state is None: - yield self.node._definition.Task( + yield Task( definition=self.node._definition, submitter=self.submitter, name=self.node.name, ) else: for index, split_defn in self.node._split_definition().items(): - yield self.node._definition.Task( + yield Task( definition=split_defn, submitter=self.submitter, name=self.node.name, diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index d7dbb6ad03..ef0c898000 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -19,7 +19,7 @@ ) from ..core import Task from ..submitter import Submitter -from ..workers import SerialWorker +from ..workers import DebugWorker from pydra.design import python from pathlib import Path from datetime import datetime @@ -675,7 +675,7 @@ def to_tuple(x, y): return (x, y) -class BYOAddVarWorker(SerialWorker): +class BYOAddVarWorker(DebugWorker): """A dummy worker that adds 1 to the output of the task""" plugin_name = "byo_add_env_var" diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 569febe48e..766b585f1e 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -29,6 +29,9 @@ class Worker: """A base class for execution of tasks.""" + plugin_name: str + is_async: bool = True + def __init__(self, loop=None): """Initialize the worker.""" logger.debug(f"Initializing {self.__class__.__name__}") @@ -125,10 +128,11 @@ async def fetch_finished(self, futures): return pending.union(unqueued) -class SerialWorker(Worker): +class DebugWorker(Worker): """A worker to execute linearly.""" - plugin_name = "serial" + plugin_name: str = "debug" + is_async: bool = False def __init__(self, **kwargs): """Initialize worker.""" @@ -136,26 +140,19 @@ def __init__(self, **kwargs): def run( self, - task: "Task", + task: "Task | tuple[Path, Task]", rerun: bool = False, - environment: Environment | None = None, - **kwargs, ): """Run a task.""" - return self.exec_serial(task) - - def close(self): - """Return whether the task is finished.""" - - async def exec_serial( - self, task: "Task", rerun: bool = False, environment: Environment | None = None - ): if isinstance(task, Task): return task.run(rerun=rerun) else: # it could be tuple that includes pickle files with tasks and inputs task_main_pkl, _ = task return load_and_run(task_main_pkl, rerun=rerun) + def close(self): + """Return whether the task is finished.""" + async def fetch_finished(self, futures): for future in futures: await future @@ -1091,7 +1088,7 @@ class PsijSlurmWorker(PsijWorker): WORKERS = { w.plugin_name: w for w in ( - SerialWorker, + DebugWorker, ConcurrentFuturesWorker, SlurmWorker, DaskWorker, From 03c7438bafafb16a186b97b3956b9b756cb1140c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 23 Jan 2025 17:25:33 +1100 Subject: [PATCH 141/342] touching up typing of tasks to include TaskDef template --- pydra/engine/core.py | 2 +- pydra/engine/environments.py | 9 ++- pydra/engine/helpers.py | 6 +- pydra/engine/lazy.py | 4 +- pydra/engine/specs.py | 35 +++++---- pydra/engine/submitter.py | 148 ++++++----------------------------- pydra/engine/workers.py | 50 +++++------- 7 files changed, 77 insertions(+), 177 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index d6f4c2ac88..949c4373a3 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -409,7 +409,7 @@ async def run_async(self, rerun: bool = False): self.audit.start_audit(odir=self.output_dir) try: self.audit.monitor() - await self.definition._run(self) + await self.definition._run_async(self) result.outputs = self.definition.Outputs._from_task(self) except Exception: etype, eval, etr = sys.exc_info() diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index f0d1d9ee9b..06fd3fdefe 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -4,6 +4,7 @@ if ty.TYPE_CHECKING: from pydra.engine.core import Task + from pydra.engine.specs import ShellDef class Environment: @@ -17,7 +18,7 @@ class Environment: def setup(self): pass - def execute(self, task: "Task") -> dict[str, ty.Any]: + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: """ Execute the task in the environment. @@ -42,7 +43,7 @@ class Native(Environment): Native environment, i.e. the tasks are executed in the current python environment. """ - def execute(self, task: "Task") -> dict[str, ty.Any]: + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] values = execute(task.definition._command_args()) output = dict(zip(keys, values)) @@ -90,7 +91,7 @@ def bind(self, loc, mode="ro"): class Docker(Container): """Docker environment.""" - def execute(self, task: "Task") -> dict[str, ty.Any]: + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations mounts = task.definition._get_bindings(root=self.root) @@ -125,7 +126,7 @@ def execute(self, task: "Task") -> dict[str, ty.Any]: class Singularity(Container): """Singularity environment.""" - def execute(self, task: "Task") -> dict[str, ty.Any]: + def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations mounts = task.definition._get_bindings(root=self.root) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 8ce9c72093..a158c5c319 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -25,6 +25,8 @@ PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" +DefType = ty.TypeVar("DefType", bound="TaskDef") + def attrs_fields(definition, exclude_names=()) -> list[attrs.Attribute]: """Get the fields of a definition, excluding some names.""" @@ -132,7 +134,7 @@ def load_result(checksum, cache_locations): def save( task_path: Path, result: "Result | None" = None, - task: "Task | None" = None, + task: "Task[DefType] | None" = None, name_prefix: str = None, ) -> None: """ @@ -449,7 +451,7 @@ def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: from .specs import Result try: - task: Task = load_task(task_pkl=task_pkl) + task: Task[DefType] = load_task(task_pkl=task_pkl) except Exception: if task_pkl.parent.exists(): etype, eval, etr = sys.exc_info() diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index c1b0a8820d..c938833c8f 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -10,9 +10,11 @@ from .graph import DiGraph from .submitter import NodeExecution from .core import Task, Workflow + from .specs import TaskDef T = ty.TypeVar("T") +DefType = ty.TypeVar("DefType", bound="TaskDef") TypeOrAny = ty.Union[type, ty.Any] @@ -150,7 +152,7 @@ def get_value( task = graph.node(self.node.name).task(state_index) _, split_depth = TypeParser.strip_splits(self.type) - def get_nested(task: "Task", depth: int): + def get_nested(task: "Task[DefType]", depth: int): if isinstance(task, StateArray): val = [get_nested(task=t, depth=depth - 1) for t in task] if depth: diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 6898b92746..62e7b6c6db 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -40,12 +40,14 @@ from pydra.engine.graph import DiGraph from pydra.engine.submitter import NodeExecution from pydra.engine.lazy import LazyOutField - from pydra.engine.task import ShellTask from pydra.engine.core import Workflow from pydra.engine.environments import Environment from pydra.engine.workers import Worker +DefType = ty.TypeVar("DefType", bound="TaskDef") + + def is_set(value: ty.Any) -> bool: """Check if a value has been set.""" return value not in (attrs.NOTHING, EMPTY) @@ -372,7 +374,7 @@ def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: } return hash_function(sorted(field_hashes.items())), field_hashes - def _retrieve_values(self, wf, state_index=None): + def _resolve_lazy_fields(self, wf, state_index=None): """Parse output results.""" temp_values = {} for field in attrs_fields(self): @@ -482,7 +484,7 @@ class Runtime: class Result(ty.Generic[OutputsType]): """Metadata regarding the outputs of processing.""" - task: "Task" + task: "Task[DefType]" outputs: OutputsType | None = None runtime: Runtime | None = None errored: bool = False @@ -548,13 +550,13 @@ class RuntimeSpec: class PythonOutputs(TaskOutputs): @classmethod - def _from_task(cls, task: "Task") -> Self: + def _from_task(cls, task: "Task[PythonDef]") -> Self: """Collect the outputs of a task from a combination of the provided inputs, the objects in the output directory, and the stdout and stderr of the process. Parameters ---------- - task : Task + task : Task[PythonDef] The task whose outputs are being collected. outputs_dict : dict[str, ty.Any] The outputs of the task, as a dictionary @@ -575,7 +577,7 @@ def _from_task(cls, task: "Task") -> Self: class PythonDef(TaskDef[PythonOutputsType]): - def _run(self, task: "Task") -> None: + def _run(self, task: "Task[PythonDef]") -> None: # Prepare the inputs to the function inputs = attrs_values(self) del inputs["function"] @@ -602,12 +604,12 @@ def _run(self, task: "Task") -> None: class WorkflowOutputs(TaskOutputs): @classmethod - def _from_task(cls, task: "Task") -> Self: + def _from_task(cls, task: "Task[WorkflowDef]") -> Self: """Collect the outputs of a workflow task from the outputs of the nodes in the Parameters ---------- - task : Task + task : Task[WorfklowDef] The task whose outputs are being collected. Returns @@ -659,12 +661,13 @@ class WorkflowDef(TaskDef[WorkflowOutputsType]): _constructed = attrs.field(default=None, init=False) - def _run(self, task: "Task") -> None: + def _run(self, task: "Task[WorkflowDef]") -> None: """Run the workflow.""" - if task.submitter.worker.is_async: - task.submitter.expand_workflow_async(task) - else: - task.submitter.expand_workflow(task) + task.submitter.expand_workflow(task) + + async def _run_async(self, task: "Task[WorkflowDef]") -> None: + """Run the workflow asynchronously.""" + await task.submitter.expand_workflow_async(task) def construct(self) -> "Workflow": from pydra.engine.core import Workflow @@ -688,7 +691,7 @@ class ShellOutputs(TaskOutputs): stderr: str = shell.out(help=STDERR_HELP) @classmethod - def _from_task(cls, task: "ShellTask") -> Self: + def _from_task(cls, task: "Task[ShellDef]") -> Self: """Collect the outputs of a shell process from a combination of the provided inputs, the objects in the output directory, and the stdout and stderr of the process. @@ -784,7 +787,7 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellDef") -> bool: def _resolve_value( cls, fld: "shell.out", - task: "Task", + task: "Task[DefType]", ) -> ty.Any: """Collect output file if metadata specified.""" from pydra.design import shell @@ -842,7 +845,7 @@ class ShellDef(TaskDef[ShellOutputsType]): RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) - def _run(self, task: "Task") -> None: + def _run(self, task: "Task[ShellDef]") -> None: """Run the shell command.""" task.return_values = task.environment.execute(task) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index df0512e076..c059a20ace 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -32,6 +32,8 @@ from .specs import TaskDef, WorkflowDef from .environments import Environment +DefType = ty.TypeVar("DefType", bound="TaskDef") + # Used to flag development mode of Audit develop = False @@ -155,9 +157,9 @@ def Split(): ) task = Task(task_def, submitter=self, name="task", environment=self.environment) if task.is_async: - self.loop.run_until_complete(self.expand_runnable_async(task)) + self.loop.run_until_complete(task.run_async(rerun=self.rerun)) else: - self.expand_runnable(task) + task.run(rerun=self.rerun) PersistentCache().clean_up() result = task.result() if result is None: @@ -187,72 +189,6 @@ def __setstate__(self, state): self._worker = WORKERS[self.worker_name](**self.worker_kwargs) self.worker.loop = self.loop - def expand_runnable(self, task: "Task"): - """ - This coroutine handles state expansion. - - Removes any states from `runnable`. If `wait` is - set to False (default), aggregates all worker - execution coroutines and returns them. If `wait` is - True, waits for all coroutines to complete / error - and returns None. - - Parameters - ---------- - runnable : pydra Task - Task instance (`Task`, `Workflow`) - wait : bool (False) - Await all futures before completing - - Returns - ------- - futures : set or None - Coroutines for :class:`~pydra.engine.core.TaskBase` execution. - - """ - task.run(rerun=self.rerun) - - async def expand_runnable_async( - self, task: "Task", wait=False - ) -> set[ty.Coroutine] | None: - """ - This coroutine handles state expansion. - - Removes any states from `runnable`. If `wait` is - set to False (default), aggregates all worker - execution coroutines and returns them. If `wait` is - True, waits for all coroutines to complete / error - and returns None. - - Parameters - ---------- - runnable : pydra Task - Task instance (`Task`, `Workflow`) - wait : bool (False) - Await all futures before completing - - Returns - ------- - futures : set or None - Coroutines for :class:`~pydra.engine.core.TaskBase` execution. - - """ - futures = set() - - if is_workflow(task.definition): - futures.add(asyncio.create_task(task.run(self.rerun))) - else: - task_pkl = await prepare_runnable(task) - futures.add(self.worker.run((task_pkl, task), rerun=self.rerun)) - - if wait and futures: - # if wait is True, we are at the end of the graph / state expansion. - # Once the remaining jobs end, we will exit `submit_from_call` - await asyncio.gather(*futures) - return - # pass along futures to be awaited independently - return futures - def expand_workflow(self, workflow_task: "Task[WorkflowDef]") -> None: """Expands and executes a workflow task synchronously. Typically only used during debugging and testing, as the asynchronous version is more efficient. @@ -273,12 +209,12 @@ def expand_workflow(self, workflow_task: "Task[WorkflowDef]") -> None: # grab inputs if needed logger.debug(f"Retrieving inputs for {task}") # TODO: add state idx to retrieve values to reduce waiting - task.definition._retrieve_values(wf) + task.definition._resolve_lazy_fields(wf) self.worker.run(task, rerun=self.rerun) tasks = self.get_runnable_tasks(exec_graph) workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} - async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: + async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> None: """ Expand and execute a workflow task asynchronously. @@ -287,7 +223,7 @@ async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: task : :obj:`~pydra.engine.core.Task[WorkflowDef]` Workflow Task object """ - wf = task.definition.construct() + wf = workflow_task.definition.construct() # Generate the execution graph exec_graph = wf.execution_graph(submitter=self) # keep track of pending futures @@ -301,7 +237,7 @@ async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: # so try to get_runnable_tasks for another minute ii = 0 while not tasks and exec_graph.nodes: - tasks, follow_err = self.get_runnable_tasks(exec_graph) + tasks = self.get_runnable_tasks(exec_graph) ii += 1 # don't block the event loop! await asyncio.sleep(1) @@ -312,7 +248,7 @@ async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: "results predecessors:\n\n" ) # Get blocked tasks and the predecessors they are waiting on - outstanding: dict[Task, list[Task]] = { + outstanding: dict[Task[DefType], list[Task[DefType]]] = { t: [ p for p in exec_graph.predecessors[t.name] if not p.done ] @@ -359,7 +295,7 @@ async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: # grab inputs if needed logger.debug(f"Retrieving inputs for {task}") # TODO: add state idx to retrieve values to reduce waiting - task.definition._retrieve_values(wf) + task.definition._resolve_lazy_fields(wf) if is_workflow(task): await task.run(self) # single task @@ -367,7 +303,7 @@ async def expand_workflow_async(self, task: "Task[WorkflowDef]") -> None: task_futures.add(self.worker.run(task, rerun=self.rerun)) task_futures = await self.worker.fetch_finished(task_futures) tasks = self.get_runnable_tasks(exec_graph) - task.return_values = {"workflow": wf, "exec_graph": exec_graph} + workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} def __enter__(self): return self @@ -386,10 +322,7 @@ def close(self): if self._own_loop: self.loop.close() - def get_runnable_tasks( - self, - graph: DiGraph, - ) -> tuple[list["Task"], dict["NodeExecution", list[str]]]: + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """Parse a graph and return all runnable tasks. Parameters @@ -435,7 +368,7 @@ def cache_dir(self, location): self._cache_dir = Path(self._cache_dir).resolve() -class NodeExecution: +class NodeExecution(ty.Generic[DefType]): """A wrapper around a workflow node containing the execution state of the tasks that are generated from it""" @@ -444,17 +377,17 @@ class NodeExecution: submitter: Submitter # List of tasks that were completed successfully - successful: dict[StateIndex | None, list["Task"]] + successful: dict[StateIndex | None, list["Task[DefType]"]] # List of tasks that failed - errored: dict[StateIndex | None, "Task"] + errored: dict[StateIndex | None, "Task[DefType]"] # List of tasks that couldn't be run due to upstream errors - unrunnable: dict[StateIndex | None, list["Task"]] + unrunnable: dict[StateIndex | None, list["Task[DefType]"]] # List of tasks that are running - running: dict[StateIndex | None, "Task"] + running: dict[StateIndex | None, "Task[DefType]"] # List of tasks that are waiting on other tasks to complete before they can be run - waiting: dict[StateIndex | None, "Task"] + waiting: dict[StateIndex | None, "Task[DefType]"] - _tasks: dict[StateIndex | None, "Task"] | None + _tasks: dict[StateIndex | None, "Task[DefType]"] | None def __init__(self, node: "Node", submitter: Submitter): self.name = node.name @@ -478,12 +411,12 @@ def _definition(self) -> "Node": return self.node._definition @property - def tasks(self) -> ty.Iterable["Task"]: + def tasks(self) -> ty.Iterable["Task[DefType]"]: if self._tasks is None: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def task(self, index: StateIndex | None = None) -> "Task | list[Task]": + def task(self, index: StateIndex | None = None) -> "Task | list[Task[DefType]]": """Get a task object for a given state index.""" self.tasks # Ensure tasks are loaded try: @@ -513,7 +446,7 @@ def all_failed(self) -> bool: self.successful or self.waiting or self.running ) - def _generate_tasks(self) -> ty.Iterable["Task"]: + def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: if self.node.state is None: yield Task( definition=self.node._definition, @@ -529,40 +462,7 @@ def _generate_tasks(self) -> ty.Iterable["Task"]: state_index=index, ) - # if state_index is None: - # # if state_index=None, collecting all results - # if self.node.state.combiner: - # return self._combined_output(return_inputs=return_inputs) - # else: - # results = [] - # for ind in range(len(self.node.state.inputs_ind)): - # checksum = self.checksum_states(state_index=ind) - # result = load_result(checksum, cache_locations) - # if result is None: - # return None - # results.append(result) - # if return_inputs is True or return_inputs == "val": - # return list(zip(self.node.state.states_val, results)) - # elif return_inputs == "ind": - # return list(zip(self.node.state.states_ind, results)) - # else: - # return results - # else: # state_index is not None - # if self.node.state.combiner: - # return self._combined_output(return_inputs=return_inputs)[ - # state_index - # ] - # result = load_result(self.checksum_states(state_index), cache_locations) - # if return_inputs is True or return_inputs == "val": - # return (self.node.state.states_val[state_index], result) - # elif return_inputs == "ind": - # return (self.node.state.states_ind[state_index], result) - # else: - # return result - # else: - # return load_result(self._definition._checksum, cache_locations) - - def get_runnable_tasks(self, graph: DiGraph) -> list["Task"]: + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """For a given node, check to see which tasks have been successfully run, are ready to run, can't be run due to upstream errors, or are waiting on other tasks to complete. @@ -579,7 +479,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task"]: runnable : list[NodeExecution] List of tasks that are ready to run """ - runnable: list["Task"] = [] + runnable: list["Task[DefType]"] = [] self.tasks # Ensure tasks are loaded if not self.started: self.waiting = copy(self._tasks) diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 766b585f1e..2c4dc533fc 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -4,13 +4,13 @@ import sys import json import re +import typing as ty from tempfile import gettempdir from pathlib import Path from shutil import copyfile, which - import concurrent.futures as cf - from .core import Task +from .specs import TaskDef from .helpers import ( get_available_cpus, read_and_display_async, @@ -20,11 +20,12 @@ ) import logging -from pydra.engine.environments import Environment import random logger = logging.getLogger("pydra.worker") +DefType = ty.TypeVar("DefType", bound="TaskDef") + class Worker: """A base class for execution of tasks.""" @@ -37,7 +38,7 @@ def __init__(self, loop=None): logger.debug(f"Initializing {self.__class__.__name__}") self.loop = loop - def run(self, task: "Task", **kwargs): + def run(self, task: "Task[DefType]", **kwargs): """Return coroutine for task execution.""" raise NotImplementedError @@ -140,7 +141,7 @@ def __init__(self, **kwargs): def run( self, - task: "Task | tuple[Path, Task]", + task: "Task[DefType] | tuple[Path, Task[DefType]]", rerun: bool = False, ): """Run a task.""" @@ -178,21 +179,18 @@ def __init__(self, n_procs=None): def run( self, - task: "Task", + task: "Task[DefType]", rerun: bool = False, - environment: Environment | None = None, **kwargs, ): """Run a task.""" assert self.loop, "No event loop available to submit tasks" - return self.exec_as_coro(task, rerun=rerun, environment=environment) + return self.exec_as_coro(task, rerun=rerun) - async def exec_as_coro(self, runnable, rerun=False, environment=None): + async def exec_as_coro(self, runnable: "Task[DefType]", rerun: bool = False): """Run a task (coroutine wrapper).""" if isinstance(runnable, Task): - res = await self.loop.run_in_executor( - self.pool, runnable._run, rerun, environment - ) + res = await self.loop.run_in_executor(self.pool, runnable.run, rerun) else: # it could be tuple that includes pickle files with tasks and inputs task_main_pkl, task_orig = runnable res = await self.loop.run_in_executor( @@ -235,9 +233,7 @@ def __init__(self, loop=None, max_jobs=None, poll_delay=1, sbatch_args=None): self.sbatch_args = sbatch_args or "" self.error = {} - def run( - self, task: "Task", rerun: bool = False, environment: Environment | None = None - ): + def run(self, task: "Task[DefType]", rerun: bool = False): """Worker submission API.""" script_dir, batch_script = self._prepare_runscripts(task, rerun=rerun) if (script_dir / script_dir.parts[1]) == gettempdir(): @@ -467,9 +463,7 @@ def __init__( self.default_qsub_args = default_qsub_args self.max_mem_free = max_mem_free - def run( - self, task: "Task", rerun: bool = False, environment: Environment | None = None - ): # TODO: add env + def run(self, task: "Task[DefType]", rerun: bool = False): # TODO: add env """Worker submission API.""" ( script_dir, @@ -899,17 +893,14 @@ def __init__(self, **kwargs): def run( self, - task: "Task", + task: "Task[DefType]", rerun: bool = False, - environment: Environment | None = None, **kwargs, ): """Run a task.""" - return self.exec_dask(task, rerun=rerun, environment=environment) + return self.exec_dask(task, rerun=rerun) - async def exec_dask( - self, task: "Task", rerun: bool = False, environment: Environment | None = None - ): + async def exec_dask(self, task: "Task[DefType]", rerun: bool = False): """Run a task (coroutine wrapper).""" from dask.distributed import Client @@ -950,13 +941,12 @@ def __init__(self, **kwargs): def run( self, - task: "Task", + task: "Task[DefType]", rerun: bool = False, - environment: Environment | None = None, **kwargs, ): """Run a task.""" - return self.exec_psij(task, rerun=rerun, environment=environment) + return self.exec_psij(task, rerun=rerun) def make_spec(self, cmd=None, arg=None): """ @@ -1001,7 +991,9 @@ def make_job(self, definition, attributes): return job async def exec_psij( - self, task: "Task", rerun: bool = False, environment: Environment | None = None + self, + task: "Task[DefType]", + rerun: bool = False, ): """ Run a task (coroutine wrapper). @@ -1025,7 +1017,7 @@ async def exec_psij( cache_dir = task.cache_dir file_path = cache_dir / "runnable_function.pkl" with open(file_path, "wb") as file: - pickle.dump(task._run, file) + pickle.dump(task.run, file) func_path = absolute_path / "run_pickled.py" definition = self.make_spec("python", [func_path, file_path]) else: # it could be tuple that includes pickle files with tasks and inputs From 076dc8482c91a81b0b9524db707a34c77bf652c1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 23 Jan 2025 18:21:11 +1100 Subject: [PATCH 142/342] debugged getting-started tutorial --- .../source/tutorial/1-getting-started.ipynb | 64 ++++++++++++------- new-docs/source/tutorial/tst.py | 32 ++++++---- pydra/engine/core.py | 2 +- pydra/engine/specs.py | 16 ++--- 4 files changed, 70 insertions(+), 44 deletions(-) diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index c6b7d881ab..0cb4402c88 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -54,13 +54,13 @@ "metadata": {}, "outputs": [], "source": [ - "from fileformats.medimage import Nifti\n", + "from fileformats.medimage import Nifti1\n", "\n", "nifti_dir = test_dir / \"nifti\"\n", "nifti_dir.mkdir()\n", "\n", "for i in range(10):\n", - " Nifti.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory" + " Nifti1.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory" ] }, { @@ -116,7 +116,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev128+g1e817743.d20250104\n" + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev141+g03c7438b.d20250123\n" ] } ], @@ -150,7 +150,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Result(output=LoadJsonOutputs(out={'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}), runtime=None, errored=False)\n" + "Result(task=, outputs=LoadJsonOutputs(out={'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}), runtime=None, errored=False)\n" ] } ], @@ -201,16 +201,19 @@ "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "Task.__init__() missing 1 required positional argument: 'definition'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m MrGrid(voxel\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m))\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Run the task to resample all NIfTI files\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmrgrid\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;66;03m# Print the locations of the output files\u001b[39;00m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(p) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m outputs\u001b[38;5;241m.\u001b[39moutput))\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:299\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, name, audit_flags, cache_dir, cache_locations, messengers, messenger_args, rerun)\u001b[0m\n\u001b[1;32m 296\u001b[0m task_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mTask\n\u001b[1;32m 297\u001b[0m definition \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\n\u001b[0;32m--> 299\u001b[0m task \u001b[38;5;241m=\u001b[39m \u001b[43mtask_type\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mdefinition\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43maudit_flags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudit_flags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_locations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_locations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessenger_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessenger_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessengers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessengers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 309\u001b[0m result \u001b[38;5;241m=\u001b[39m task()\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m result\u001b[38;5;241m.\u001b[39merrored:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:708\u001b[0m, in \u001b[0;36mWorkflowTask.__init__\u001b[0;34m(self, definition, name, audit_flags, cache_dir, cache_locations, input_spec, cont_dim, messenger_args, messengers, output_spec, rerun, propagate_rerun, **kwargs)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 663\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 664\u001b[0m definition,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 678\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 679\u001b[0m ):\n\u001b[1;32m 680\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 681\u001b[0m \u001b[38;5;124;03m Initialize a workflow.\u001b[39;00m\n\u001b[1;32m 682\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 705\u001b[0m \n\u001b[1;32m 706\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 708\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 709\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 710\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 711\u001b[0m \u001b[43m \u001b[49m\u001b[43mcont_dim\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcont_dim\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 712\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 713\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_locations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_locations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 714\u001b[0m \u001b[43m \u001b[49m\u001b[43maudit_flags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudit_flags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 715\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessengers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessengers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 716\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessenger_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessenger_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 717\u001b[0m \u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 718\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 720\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph \u001b[38;5;241m=\u001b[39m DiGraph(name\u001b[38;5;241m=\u001b[39mname)\n\u001b[1;32m 721\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname2obj \u001b[38;5;241m=\u001b[39m {}\n", - "\u001b[0;31mTypeError\u001b[0m: Task.__init__() missing 1 required positional argument: 'definition'" + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n" ] } ], @@ -219,13 +222,13 @@ "\n", "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", "# by splitting the \"input\" input field over all files in the directory\n", - "mrgrid = MrGrid(voxel=(0.5,0.5,0.5)).split(input=nifti_dir.iterdir())\n", + "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5,0.5,0.5)).split(in_file=nifti_dir.iterdir())\n", "\n", "# Run the task to resample all NIfTI files\n", "outputs = mrgrid()\n", "\n", "# Print the locations of the output files\n", - "print(\"\\n\".join(str(p) for p in outputs.output))" + "print(\"\\n\".join(str(p) for p in outputs.out_file))" ] }, { @@ -245,15 +248,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n" + ] + } + ], "source": [ "\n", "\n", - "mrgrid_varying_vox_sizes = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", + "mrgrid_varying_vox_sizes = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", " # Define a list of voxel sizes to resample the NIfTI files to,\n", " # the list must be the same length as the list of NIfTI files\n", " voxel=[\n", @@ -270,7 +290,7 @@ " ],\n", ")\n", "\n", - "print(mrgrid_varying_vox_sizes().output)" + "print(\"\\n\".join(str(p) for p in outputs.out_file))" ] }, { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 2da864d279..cbcf155a6b 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,15 +1,12 @@ from pathlib import Path from tempfile import mkdtemp -import shutil from fileformats.medimage import Nifti1 from pydra.tasks.mrtrix3.v3_0 import MrGrid -from pydra.utils import user_cache_dir if __name__ == "__main__": - test_dir = Path(mkdtemp()) - shutil.rmtree(user_cache_dir / "run-cache", ignore_errors=True) + test_dir = Path(mkdtemp()) nifti_dir = test_dir / "nifti" nifti_dir.mkdir() @@ -19,14 +16,23 @@ nifti_dir, seed=i ) # Create a dummy NIfTI file in the dest. directory - # Instantiate the task definition, "splitting" over all NIfTI files in the test directory - # by splitting the "input" input field over all files in the directory - mrgrid = MrGrid(operation="regrid", voxel=(0.5, 0.5, 0.5)).split( - in_file=nifti_dir.iterdir() + mrgrid_varying_vox_sizes = MrGrid(operation="regrid").split( + ("in_file", "voxel"), + in_file=nifti_dir.iterdir(), + # Define a list of voxel sizes to resample the NIfTI files to, + # the list must be the same length as the list of NIfTI files + voxel=[ + (1.0, 1.0, 1.0), + (1.0, 1.0, 1.0), + (1.0, 1.0, 1.0), + (0.5, 0.5, 0.5), + (0.75, 0.75, 0.75), + (0.5, 0.5, 0.5), + (0.5, 0.5, 0.5), + (1.0, 1.0, 1.0), + (1.25, 1.25, 1.25), + (1.25, 1.25, 1.25), + ], ) - # Run the task to resample all NIfTI files - outputs = mrgrid(worker="cf") - - # Print the locations of the output files - print("\n".join(str(p) for p in outputs.out_file)) +print(mrgrid_varying_vox_sizes().out_file) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 949c4373a3..2593f0205b 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -128,7 +128,7 @@ def __init__( self.definition = copy(definition) # We save the submitter is the definition is a workflow otherwise we don't # so the task can be pickled - self.submitter = submitter if is_workflow(definition) else None + self.submitter = submitter self.environment = environment if environment is not None else Native() self.name = name self.state_index = state_index diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 62e7b6c6db..f088fbbda1 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -237,7 +237,7 @@ def split( "set 'overwrite=True' to do so" ) if splitter: - unwraped_split = hlpst.unwrap_splitter(splitter) + unwraped_split = list(hlpst.unwrap_splitter(splitter)) if duplicated := [f for f, c in Counter(unwraped_split).items() if c > 1]: raise ValueError(f"Splitter fields {duplicated} are duplicated") split_names = set( @@ -584,17 +584,17 @@ def _run(self, task: "Task[PythonDef]") -> None: # Run the actual function returned = self.function(**inputs) # Collect the outputs and save them into the task.return_values dictionary - self.return_values = {f.name: f.default for f in attrs.fields(self.Outputs)} - return_names = list(self.return_values) + task.return_values = {f.name: f.default for f in attrs.fields(self.Outputs)} + return_names = list(task.return_values) if returned is None: - self.return_values = {nm: None for nm in return_names} - elif len(self.return_values) == 1: + task.return_values = {nm: None for nm in return_names} + elif len(task.return_values) == 1: # if only one element in the fields, everything should be returned together - self.return_values = {list(self.return_values)[0]: returned} + task.return_values = {list(task.return_values)[0]: returned} elif isinstance(returned, tuple) and len(return_names) == len(returned): - self.return_values = dict(zip(return_names, returned)) + task.return_values = dict(zip(return_names, returned)) elif isinstance(returned, dict): - self.return_values = {key: returned.get(key, None) for key in return_names} + task.return_values = {key: returned.get(key, None) for key in return_names} else: raise RuntimeError( f"expected {len(return_names)} elements, but {returned} were returned" From c3a144eeb7671bb49e41351b9bd376849b8451da Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 Jan 2025 11:10:33 +1100 Subject: [PATCH 143/342] debugging workflow execution internals --- .../tutorial/2-advanced-execution.ipynb | 140 +++++++++--------- new-docs/source/tutorial/tst.py | 40 ++--- pydra/design/base.py | 10 +- pydra/design/python.py | 5 +- pydra/design/shell.py | 6 +- pydra/design/tests/test_shell.py | 88 ++++++----- pydra/engine/core.py | 20 ++- pydra/engine/lazy.py | 29 ++-- pydra/engine/specs.py | 59 ++++++-- pydra/engine/submitter.py | 48 +++--- pydra/engine/tests/test_dockertask.py | 30 ++-- pydra/engine/tests/test_environments.py | 4 +- pydra/utils/hash.py | 27 +++- 13 files changed, 281 insertions(+), 225 deletions(-) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 5cdeaddc60..068546252c 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -14,6 +14,16 @@ "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" ] }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -22,41 +32,68 @@ "\n", "Pydra supports several workers with which to execute tasks\n", "\n", - "- `ConcurrentFutures`\n", - "- `SLURM`\n", - "- `SGE`\n", - "- `Dask` (experimental)\n", - "- `Serial` (for debugging)\n", + "- `cf`\n", + "- `slurm`\n", + "- `sge`\n", + "- `dask` (experimental)\n", + "- `debug`\n", "\n", - "By default, the *ConcurrentFutures* worker (abbreviated to \"cf\") will be used, which\n", - "divides tasks across multiple processes. If you are using a high-performance cluster (HPC)\n", + "By default, the *cf* (*ConcurrentFutures*) worker is used, which\n", + "executes tasks across multiple processes. If you are using a high-performance cluster (HPC)\n", "then the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) workers can be used to submit each\n", - "workflow node as separate jobs to the HPC scheduler. When using a graphical debugger to\n", - "debug workflow or Python tasks, the *Serial* worker is recommended. There is also an\n", + "workflow node as separate jobs to the HPC scheduler. There is also an\n", "experimental [Dask](https://www.dask.org/) worker.\n", "\n", + "When using a debugger in the development of a workflow or Python tasks, the\n", + "*debug* worker is recommended as it executes nodes \"synchronously\" (as opposed to\n", + "asynchronously), and can therefore break on uncaught exceptions.\n", + "\n", "To specify a worker, the abbreviation can be passed either as a string or using the\n", - "class itself" + "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", + "arguments to the execution call. For example, if we wanted to run five tasks using the\n", + "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", + "call." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "'typing.TypeVar' object is not subscriptable", + "name": "stderr", + "output_type": "stream", + "text": [ + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev141+g03c7438b.d20250123\n" + ] + }, + { + "ename": "IndentationError", + "evalue": "unexpected indent (, line 1)", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdesign\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m python\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;129;43m@python\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefine\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;43;01mdef\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;21;43mPower10\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mn\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m:\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mreturn\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\n\u001b[1;32m 7\u001b[0m pow10 \u001b[38;5;241m=\u001b[39m Power10()\u001b[38;5;241m.\u001b[39msplit(n\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m])\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/design/python.py:124\u001b[0m, in \u001b[0;36mdefine\u001b[0;34m(wrapped, inputs, outputs, bases, outputs_bases, auto_attribs)\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;129m@dataclass_transform\u001b[39m(\n\u001b[1;32m 93\u001b[0m kw_only_default\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 94\u001b[0m field_specifiers\u001b[38;5;241m=\u001b[39m(arg,),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 103\u001b[0m auto_attribs: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 104\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPythonDef\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 105\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m Create an interface for a function or a class.\u001b[39;00m\n\u001b[1;32m 107\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03m The task definition class for the Python function\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 124\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mspecs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PythonDef, PythonOutputs\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmake\u001b[39m(wrapped: ty\u001b[38;5;241m.\u001b[39mCallable \u001b[38;5;241m|\u001b[39m \u001b[38;5;28mtype\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m PythonDef:\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inspect\u001b[38;5;241m.\u001b[39misclass(wrapped):\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:1020\u001b[0m\n\u001b[1;32m 1010\u001b[0m argstr_formatted \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1011\u001b[0m argstr_formatted\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[ \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m[\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1012\u001b[0m \u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m ]\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;241m.\u001b[39mstrip()\n\u001b[1;32m 1016\u001b[0m )\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m argstr_formatted\n\u001b[0;32m-> 1020\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m core \u001b[38;5;66;03m# noqa: E402\u001b[39;00m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:60\u001b[0m\n\u001b[1;32m 56\u001b[0m DefType \u001b[38;5;241m=\u001b[39m ty\u001b[38;5;241m.\u001b[39mTypeVar(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDefType\u001b[39m\u001b[38;5;124m\"\u001b[39m, bound\u001b[38;5;241m=\u001b[39mTaskDef)\n\u001b[1;32m 57\u001b[0m OutputsType \u001b[38;5;241m=\u001b[39m ty\u001b[38;5;241m.\u001b[39mTypeVar(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOutputType\u001b[39m\u001b[38;5;124m\"\u001b[39m, bound\u001b[38;5;241m=\u001b[39mTaskOutputs)\n\u001b[0;32m---> 60\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mTask\u001b[39;00m(ty\u001b[38;5;241m.\u001b[39mGeneric[\u001b[43mDefType\u001b[49m\u001b[43m[\u001b[49m\u001b[43mOutputsType\u001b[49m\u001b[43m]\u001b[49m]):\n\u001b[1;32m 61\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;124;03m A base structure for the nodes in the processing graph.\u001b[39;00m\n\u001b[1;32m 63\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 66\u001b[0m \n\u001b[1;32m 67\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 69\u001b[0m _api_version: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m0.0.1\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;66;03m# Should generally not be touched by subclasses\u001b[39;00m\n", - "\u001b[0;31mTypeError\u001b[0m: 'typing.TypeVar' object is not subscriptable" + "Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n", + "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/IPython/core/interactiveshell.py:3577\u001b[0m in \u001b[1;35mrun_code\u001b[0m\n exec(code_obj, self.user_global_ns, self.user_ns)\u001b[0m\n", + "\u001b[0m Cell \u001b[1;32mIn[2], line 10\u001b[0m\n outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/specs.py:193\u001b[0m in \u001b[1;35m__call__\u001b[0m\n result = sub(self)\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/submitter.py:157\u001b[0m in \u001b[1;35m__call__\u001b[0m\n self.loop.run_until_complete(task.run_async(rerun=self.rerun))\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m in \u001b[1;35mrun_until_complete\u001b[0m\n return f.result()\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m in \u001b[1;35mresult\u001b[0m\n raise self._exception.with_traceback(self._exception_tb)\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m in \u001b[1;35m__step_run_and_handle_result\u001b[0m\n result = coro.send(None)\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:398\u001b[0m in \u001b[1;35mrun_async\u001b[0m\n \"'%s' is attempting to acquire lock on %s\", self.name, self.lockfile\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:212\u001b[0m in \u001b[1;35mlockfile\u001b[0m\n return self.output_dir.with_suffix(\".lock\")\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:261\u001b[0m in \u001b[1;35moutput_dir\u001b[0m\n return self.cache_dir / self.checksum\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:206\u001b[0m in \u001b[1;35mchecksum\u001b[0m\n input_hash = self.definition._hash\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/specs.py:361\u001b[0m in \u001b[1;35m_hash\u001b[0m\n hsh, self._hashes = self._compute_hashes()\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/specs.py:384\u001b[0m in \u001b[1;35m_compute_hashes\u001b[0m\n k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items()\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:206\u001b[0m in \u001b[1;35mhash_function\u001b[0m\n return hash_object(obj, **kwargs).hex()\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:237\u001b[0m in \u001b[1;35mhash_object\u001b[0m\n raise e\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:225\u001b[0m in \u001b[1;35mhash_object\u001b[0m\n return hash_single(obj, cache)\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:281\u001b[0m in \u001b[1;35mhash_single\u001b[0m\n first = next(bytes_it)\u001b[0m\n", + "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:537\u001b[0m in \u001b[1;35mbytes_repr_function\u001b[0m\n src_ast = ast.parse(src)\u001b[0m\n", + "\u001b[0;36m File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/ast.py:52\u001b[0;36m in \u001b[0;35mparse\u001b[0;36m\n\u001b[0;31m return compile(source, filename, mode, flags,\u001b[0;36m\n", + "\u001b[0;36m File \u001b[0;32m:1\u001b[0;36m\u001b[0m\n\u001b[0;31m @workflow.define(outputs=output_types)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unexpected indent\nand therefore cannot hash `.Split at 0x11443cd60>` of type `builtins.function`. Consider implementing a specific `bytes_repr()`(see pydra.utils.hash.register_serializer) or a `__bytes_repr__()` dunder methods for this type\n" ] } ], @@ -64,12 +101,17 @@ "from pydra.design import python\n", "\n", "@python.define\n", - "def Power10(n: int) -> int:\n", - " return 10 ** n\n", + "def TenToThePower(p: int) -> int:\n", + " return 10 ** p\n", + "\n", + "ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", + "\n", + "# Run the 5 tasks in parallel split across 3 processes\n", + "outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", "\n", - "pow10 = Power10().split(n=[1, 2, 3, 4, 5])\n", + "p1, p2, p3, p4, p5 = outputs.out\n", "\n", - "p1, p2, p3, p4, p5 = pow10(worker=\"serial\")" + "print(f\"10^5 = {p5}\")" ] }, { @@ -90,23 +132,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Nifti\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Submitter\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Make directory filled with nifti files\u001b[39;00m\n\u001b[1;32m 8\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m Path(tempfile\u001b[38;5;241m.\u001b[39mmkdtemp())\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Auto-generated, do not edit\u001b[39;00m\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2gmwmi_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Gmwmi\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivett2vis_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Fivett2Vis\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfivettcheck_\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FivettCheck\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/pydra/tasks/mrtrix3/v3_0/fivett2gmwmi_.py:7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgeneric\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m File, Directory \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfileformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmedimage_mrtrix3\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ImageIn, ImageOut, Tracks \u001b[38;5;66;03m# noqa: F401\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m specs, ShellCommandTask\n\u001b[1;32m 10\u001b[0m input_fields \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Arguments\u001b[39;00m\n\u001b[1;32m 12\u001b[0m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 104\u001b[0m ),\n\u001b[1;32m 105\u001b[0m ]\n\u001b[1;32m 107\u001b[0m Fivett2GmwmiInputSpec \u001b[38;5;241m=\u001b[39m specs\u001b[38;5;241m.\u001b[39mSpecInfo(\n\u001b[1;32m 108\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFivett2GmwmiInput\u001b[39m\u001b[38;5;124m\"\u001b[39m, fields\u001b[38;5;241m=\u001b[39minput_fields, bases\u001b[38;5;241m=\u001b[39m(specs\u001b[38;5;241m.\u001b[39mShellSpec,)\n\u001b[1;32m 109\u001b[0m )\n", - "\u001b[0;31mImportError\u001b[0m: cannot import name 'ShellCommandTask' from 'pydra.engine' (/Users/tclose/git/workflows/pydra/pydra/engine/__init__.py)" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "import tempfile\n", @@ -150,21 +178,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'MrGrid' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mrgrid_varying_vox_sizes2 \u001b[38;5;241m=\u001b[39m \u001b[43mMrGrid\u001b[49m()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 2\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 4\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m submitter \u001b[38;5;241m=\u001b[39m Submitter(cache_dir\u001b[38;5;241m=\u001b[39mtest_dir \u001b[38;5;241m/\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Result from previous run is reused as the task and inputs are identical\u001b[39;00m\n", - "\u001b[0;31mNameError\u001b[0m: name 'MrGrid' is not defined" - ] - } - ], + "outputs": [], "source": [ "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", " (\"input\", \"voxel\"),\n", @@ -207,21 +223,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'nifti_dir' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Rename a NIfTI file within the test directory\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m first_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mnext\u001b[39m(\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 3\u001b[0m first_file\u001b[38;5;241m.\u001b[39mrename(first_file\u001b[38;5;241m.\u001b[39mwith_name(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfirst.nii.gz\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 5\u001b[0m mrgrid_varying_vox_sizes3 \u001b[38;5;241m=\u001b[39m MrGrid()\u001b[38;5;241m.\u001b[39msplit(\n\u001b[1;32m 6\u001b[0m (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mvoxel\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39mnifti_dir\u001b[38;5;241m.\u001b[39miterdir(),\n\u001b[1;32m 8\u001b[0m voxel\u001b[38;5;241m=\u001b[39mVOXEL_SIZES\n\u001b[1;32m 9\u001b[0m )\n", - "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" - ] - } - ], + "outputs": [], "source": [ "# Rename a NIfTI file within the test directory\n", "first_file = next(nifti_dir.iterdir())\n", diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index cbcf155a6b..b563e236af 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,38 +1,16 @@ -from pathlib import Path -from tempfile import mkdtemp -from fileformats.medimage import Nifti1 -from pydra.tasks.mrtrix3.v3_0 import MrGrid +from pydra.design import python -if __name__ == "__main__": +@python.define +def TenToThePower(p: int) -> int: + return 10**p - test_dir = Path(mkdtemp()) - nifti_dir = test_dir / "nifti" - nifti_dir.mkdir() +ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) - for i in range(10): - Nifti1.sample( - nifti_dir, seed=i - ) # Create a dummy NIfTI file in the dest. directory +# Run the 5 tasks in parallel split across 3 processes +outputs = ten_to_the_power(worker="cf", n_procs=3) - mrgrid_varying_vox_sizes = MrGrid(operation="regrid").split( - ("in_file", "voxel"), - in_file=nifti_dir.iterdir(), - # Define a list of voxel sizes to resample the NIfTI files to, - # the list must be the same length as the list of NIfTI files - voxel=[ - (1.0, 1.0, 1.0), - (1.0, 1.0, 1.0), - (1.0, 1.0, 1.0), - (0.5, 0.5, 0.5), - (0.75, 0.75, 0.75), - (0.5, 0.5, 0.5), - (0.5, 0.5, 0.5), - (1.0, 1.0, 1.0), - (1.25, 1.25, 1.25), - (1.25, 1.25, 1.25), - ], - ) +p1, p2, p3, p4, p5 = outputs.out -print(mrgrid_varying_vox_sizes().out_file) +print(f"10^5 = {p5}") diff --git a/pydra/design/base.py b/pydra/design/base.py index 3ead73933c..f5dd8b7507 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -240,7 +240,7 @@ class Arg(Field): readonly: bool = False -@attrs.define(kw_only=True) +@attrs.define(kw_only=True, slots=False) class Out(Field): """Base class for output fields of task definitions @@ -261,9 +261,11 @@ class Out(Field): The converter for the field passed through to the attrs.field, by default it is None validator: callable | iterable[callable], optional The validator(s) for the field passed through to the attrs.field, by default it is None + order : int + The order of the output in the output list, allows for tuple unpacking of outputs """ - pass + order: int = attrs.field(default=None) def extract_fields_from_class( @@ -392,6 +394,10 @@ def make_task_def( spec_type._check_arg_refs(inputs, outputs) + # Set positions for outputs to allow for tuple unpacking + for i, output in enumerate(outputs.values()): + output.order = i + if name is None and klass is not None: name = klass.__name__ if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: diff --git a/pydra/design/python.py b/pydra/design/python.py index 433a189d65..197e357238 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -75,9 +75,12 @@ class out(Out): The converter for the field passed through to the attrs.field, by default it is None validator: callable | iterable[callable], optional The validator(s) for the field passed through to the attrs.field, by default it is None + position : int + The position of the output in the output list, allows for tuple unpacking of + outputs """ - pass + position: int = attrs.field(default=None) @dataclass_transform( diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 90aa168e2d..6c8889b990 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -124,6 +124,8 @@ class out(Out): to the function), output_dir (task output_dir will be used), stdout, stderr (stdout and stderr of the task will be sent) inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). + order : int + The order of the output in the output list, allows for tuple unpacking of outputs """ callable: ty.Callable | None = None @@ -135,7 +137,7 @@ def __attrs_post_init__(self): @attrs.define(kw_only=True) -class outarg(Out, arg): +class outarg(arg, Out): """An input field that specifies where to save the output file Parameters @@ -174,7 +176,7 @@ class outarg(Out, arg): the flag is used before every element if a list is provided as a value. If no argstr is used the field is not part of the command. position: int, optional - Position of the field in the command, could be nonnegative or negative integer. + Position of the field in the command line, could be nonnegative or negative integer. If nothing is provided the field will be inserted between all fields with nonnegative positions and fields with negative positions. sep: str, optional diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 316c7db6f3..bcbab495aa 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -35,10 +35,10 @@ def test_interface_template(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg(name="in_path", type=FsObject, position=1), + shell.arg(name="in_path", type=FsObject, argpos=1), output, ] assert sorted_fields(Cp.Outputs) == [ @@ -82,10 +82,10 @@ def test_interface_template_w_types_and_path_template_ext(): validator=attrs.validators.min_len(1), default="trim-png", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg(name="in_image", type=image.Png, position=1), + shell.arg(name="in_image", type=image.Png, argpos=1), output, ] assert sorted_fields(TrimPng.Outputs) == [ @@ -122,12 +122,10 @@ def test_interface_template_w_modify(): validator=attrs.validators.min_len(1), default="trim-png", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg( - name="image", type=image.Png, position=1, copy_mode=File.CopyMode.copy - ), + shell.arg(name="image", type=image.Png, argpos=1, copy_mode=File.CopyMode.copy), ] assert sorted_fields(TrimPng.Outputs) == [ shell.out( @@ -180,34 +178,34 @@ def test_interface_template_more_complex(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " ), output, - shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), + shell.arg(name="recursive", argstr="-R", type=bool, default=False, argpos=3), shell.arg( name="text_arg", argstr="--text-arg", type=str | None, default=None, - position=4, + argpos=4, ), shell.arg( name="int_arg", argstr="--int-arg", type=int | None, default=None, - position=5, + argpos=5, ), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str] | None, default=None, - position=6, + argpos=6, ), ] assert sorted_fields(Cp.Outputs) == [ @@ -278,11 +276,11 @@ def test_interface_template_with_overrides_and_optionals(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " ), shell.arg( name="recursive", @@ -290,21 +288,21 @@ def test_interface_template_with_overrides_and_optionals(): type=bool, default=False, help=RECURSIVE_HELP, - position=2, + argpos=2, ), - shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str, argpos=3), shell.arg( name="int_arg", argstr="--int-arg", type=int | None, default=None, - position=4, + argpos=4, ), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], - position=5, + argpos=5, ), ] + outargs @@ -353,24 +351,24 @@ def test_interface_template_with_defaults(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " ), output, - shell.arg(name="recursive", argstr="-R", type=bool, default=True, position=3), + shell.arg(name="recursive", argstr="-R", type=bool, default=True, argpos=3), shell.arg( - name="text_arg", argstr="--text-arg", type=str, position=4, default="foo" + name="text_arg", argstr="--text-arg", type=str, argpos=4, default="foo" ), - shell.arg(name="int_arg", argstr="--int-arg", type=int, position=5, default=99), + shell.arg(name="int_arg", argstr="--int-arg", type=int, argpos=5, default=99), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], default=(1, "bar"), - position=6, + argpos=6, ), ] assert sorted_fields(Cp.Outputs) == [ @@ -421,26 +419,26 @@ def test_interface_template_with_type_overrides(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " ), output, - shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), - shell.arg(name="text_arg", argstr="--text-arg", type=str, position=4), + shell.arg(name="recursive", argstr="-R", type=bool, default=False, argpos=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str, argpos=4), shell.arg( name="int_arg", argstr="--int-arg", type=int | None, - position=5, + argpos=5, ), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], - position=6, + argpos=6, ), ] assert sorted_fields(Cp.Outputs) == [ @@ -474,7 +472,7 @@ class Ls(ShellDef["Ls.Outputs"]): directory: Directory = shell.arg( help="the directory to list the contents of", argstr="", - position=-1, + argpos=-1, ) hidden: bool = shell.arg( help=("display hidden FS objects"), @@ -525,7 +523,7 @@ class Outputs(ShellOutputs): type=Directory, help="the directory to list the contents of", argstr="", - position=-1, + argpos=-1, ), "hidden": shell.arg( type=bool, @@ -649,7 +647,7 @@ class A: executable = "cp" - x: File = shell.arg(argstr="", position=1) + x: File = shell.arg(argstr="", argpos=1) class Outputs: """The outputs of the example shell interface @@ -669,7 +667,7 @@ class Outputs: type=File, help="an input file", argstr="", - position=1, + argpos=1, ), }, outputs={ @@ -699,7 +697,7 @@ class A: executable = "cp" - x: File = shell.arg(help="an input file", argstr="", position=1) + x: File = shell.arg(help="an input file", argstr="", argpos=1) class Outputs: y: File = shell.outarg( @@ -731,7 +729,7 @@ class Outputs: default="cp", type=str | ty.Sequence[str], argstr="", - position=0, + argpos=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( @@ -739,7 +737,7 @@ class Outputs: type=File, help="an input file", argstr="", - position=1, + argpos=1, ), output, ] @@ -772,7 +770,7 @@ def test_shell_output_field_name_dynamic(): type=File, help="an input file", argstr="", - position=1, + argpos=1, ), }, outputs={ @@ -796,7 +794,7 @@ def get_file_size(y: Path): def test_shell_bases_dynamic(A, tmp_path): B = shell.define( name="B", - inputs={"y": shell.arg(type=File, help="output file", argstr="", position=-1)}, + inputs={"y": shell.arg(type=File, help="output file", argstr="", argpos=-1)}, outputs={ "out_file_size": { "type": int, @@ -863,7 +861,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): type=Directory, help="input directory", argstr="", - position=-1, + argpos=-1, ) }, outputs={ @@ -904,7 +902,7 @@ def test_shell_inputs_outputs_bases_static(tmp_path): class A: executable = "ls" - directory: Directory = shell.arg(help="input directory", argstr="", position=-1) + directory: Directory = shell.arg(help="input directory", argstr="", argpos=-1) class Outputs: entries: list = shell.out( @@ -937,7 +935,7 @@ def test_shell_missing_executable_static(): @shell.define class A: directory: Directory = shell.arg( - help="input directory", argstr="", position=-1 + help="input directory", argstr="", argpos=-1 ) class Outputs: @@ -959,7 +957,7 @@ def test_shell_missing_executable_dynamic(): type=Directory, help="input directory", argstr="", - position=-1, + argpos=-1, ), }, outputs={ diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 2593f0205b..ca3e35f41b 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -124,8 +124,10 @@ def __init__( if state_index is None: state_index = state.StateIndex() - # Copy the definition, so lazy fields can be resolved and replaced at runtime - self.definition = copy(definition) + # Check that the definition is fully resolved and ready to run + definition._check_resolved() + definition._check_rules() + self.definition = definition # We save the submitter is the definition is a workflow otherwise we don't # so the task can be pickled self.submitter = submitter @@ -338,8 +340,6 @@ def run(self, rerun: bool = False): # run_async to use common helper methods for pre/post run tasks # checking if the definition is fully resolved and ready to run - self.definition._check_resolved() - self.definition._check_rules() self.hooks.pre_run(self) logger.debug( "'%s' is attempting to acquire lock on %s", self.name, self.lockfile @@ -391,8 +391,6 @@ async def run_async(self, rerun: bool = False): propagated to all tasks within workflow tasks. """ # checking if the definition is fully resolved and ready to run - self.definition._check_resolved() - self.definition._check_rules() self.hooks.pre_run(self) logger.debug( "'%s' is attempting to acquire lock on %s", self.name, self.lockfile @@ -732,7 +730,15 @@ def under_construction(cls) -> "Workflow[ty.Any]": def execution_graph(self, submitter: "Submitter") -> DiGraph: from pydra.engine.submitter import NodeExecution - return self._create_graph([NodeExecution(n, submitter) for n in self.nodes]) + exec_nodes = [ + NodeExecution(n, submitter, workflow_inputs=self.inputs) for n in self.nodes + ] + graph = self._create_graph(exec_nodes) + # Set the graph attribute of the nodes so lazy fields can be resolved as tasks + # are created + for node in exec_nodes: + node.graph = graph + return graph @property def graph(self) -> DiGraph: diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index c938833c8f..9f5538d19c 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -10,7 +10,8 @@ from .graph import DiGraph from .submitter import NodeExecution from .core import Task, Workflow - from .specs import TaskDef + from .specs import TaskDef, WorkflowDef + from .state import StateIndex T = ty.TypeVar("T") @@ -65,7 +66,10 @@ def __eq__(self, other): def source(self): return self.workflow - def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty.Any: + def get_value( + self, + workflow_def: "WorkflowDef", + ) -> ty.Any: """Return the value of a lazy field. Parameters @@ -80,20 +84,7 @@ def get_value(self, wf: "Workflow", state_index: ty.Optional[int] = None) -> ty. value : Any the resolved value of the lazy-field """ - from pydra.utils.typing import ( - TypeParser, - ) # pylint: disable=import-outside-toplevel - - value = getattr(wf.inputs, self.field) - if TypeParser.is_subclass(self.type, StateArray) and not wf._pre_split: - _, split_depth = TypeParser.strip_splits(self.type) - - def apply_splits(obj, depth): - if depth < 1: - return obj - return StateArray[self.type](apply_splits(i, depth - 1) for i in obj) - - value = apply_splits(value, split_depth) + value = workflow_def[self.field] value = self._apply_cast(value) return value @@ -129,7 +120,9 @@ def name(self) -> str: return self.node.name def get_value( - self, graph: "DiGraph[NodeExecution]", state_index: ty.Optional[int] = None + self, + graph: "DiGraph[NodeExecution]", + state_index: "StateIndex | None" = None, ) -> ty.Any: """Return the value of a lazy field. @@ -184,10 +177,10 @@ def get_nested(task: "Task[DefType]", depth: int): "your interface inputs." ) val = res.get_output_field(self.field) + val = self._apply_cast(val) return val value = get_nested(task, depth=split_depth) - value = self._apply_cast(value) return value @property diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index f088fbbda1..95f151fd5f 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -4,6 +4,7 @@ import re from copy import copy import os +from operator import attrgetter import inspect import itertools import platform @@ -34,13 +35,14 @@ from pydra.utils.typing import StateArray from pydra.design.base import Field, Arg, Out, RequirementSet, EMPTY from pydra.design import shell +from pydra.engine.lazy import LazyInField, LazyOutField if ty.TYPE_CHECKING: from pydra.engine.core import Task from pydra.engine.graph import DiGraph from pydra.engine.submitter import NodeExecution - from pydra.engine.lazy import LazyOutField from pydra.engine.core import Workflow + from pydra.engine.state import StateIndex from pydra.engine.environments import Environment from pydra.engine.workers import Worker @@ -85,7 +87,7 @@ def _get_node(self): f"{self} outputs object is not a lazy output of a workflow node" ) from None - def __getitem__(self, name: str) -> ty.Any: + def __getitem__(self, name_or_index: str | int) -> ty.Any: """Return the value for the given attribute Parameters @@ -98,10 +100,20 @@ def __getitem__(self, name: str) -> ty.Any: Any the value of the attribute """ + if isinstance(name_or_index, int): + return list(self)[name_or_index] try: - return getattr(self, name) + return getattr(self, name_or_index) except AttributeError: - raise KeyError(f"{self} doesn't have an attribute {name}") from None + raise KeyError( + f"{self} doesn't have an attribute {name_or_index}" + ) from None + + def __iter__(self) -> ty.Generator[ty.Any, None, None]: + """Iterate through all the values in the definition, allows for tuple unpacking""" + fields = sorted(attrs_fields(self), key=attrgetter("order")) + for field in fields: + yield getattr(self, field.name) OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) @@ -374,15 +386,36 @@ def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: } return hash_function(sorted(field_hashes.items())), field_hashes - def _resolve_lazy_fields(self, wf, state_index=None): - """Parse output results.""" - temp_values = {} - for field in attrs_fields(self): - value: "LazyOutField" = getattr(self, field.name) - if is_lazy(value): - temp_values[field.name] = value.get_value(wf, state_index=state_index) - for field, val in temp_values.items(): - setattr(self, field, val) + def _resolve_lazy_inputs( + self, + workflow_inputs: "WorkflowDef", + graph: "DiGraph[NodeExecution]", + state_index: "StateIndex | None" = None, + ) -> Self: + """Resolves lazy fields in the task definition by replacing them with their + actual values. + + Parameters + ---------- + workflow : Workflow + The workflow the task is part of + graph : DiGraph[NodeExecution] + The execution graph of the workflow + state_index : StateIndex, optional + The state index for the workflow, by default None + + Returns + ------- + Self + The task definition with all lazy fields resolved + """ + resolved = {} + for name, value in attrs_values(self).items(): + if isinstance(value, LazyInField): + resolved[name] = value.get_value(workflow_inputs) + elif isinstance(value, LazyOutField): + resolved[name] = value.get_value(graph, state_index) + return attrs.evolve(self, **resolved) def _check_rules(self): """Check if all rules are satisfied.""" diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index c059a20ace..72922983fb 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -136,19 +136,16 @@ def __call__( if task_def._splitter: from pydra.design import workflow + from pydra.engine.specs import TaskDef output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} - # We need to use a new variable as task_def will be overwritten by the time - # the Split workflow constructor is called - node_def = task_def - @workflow.define(outputs=output_types) - def Split(): - node = workflow.add(node_def) + def Split(defn: TaskDef) -> tuple: + node = workflow.add(defn) return tuple(getattr(node, o) for o in output_types) - task_def = Split() + task_def = Split(defn=task_def) elif task_def._combiner: raise ValueError( @@ -206,10 +203,6 @@ def expand_workflow(self, workflow_task: "Task[WorkflowDef]") -> None: tasks = self.get_runnable_tasks(exec_graph) while tasks or any(not n.done for n in exec_graph.nodes): for task in tasks: - # grab inputs if needed - logger.debug(f"Retrieving inputs for {task}") - # TODO: add state idx to retrieve values to reduce waiting - task.definition._resolve_lazy_fields(wf) self.worker.run(task, rerun=self.rerun) tasks = self.get_runnable_tasks(exec_graph) workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} @@ -292,13 +285,8 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non ) raise RuntimeError(msg) for task in tasks: - # grab inputs if needed - logger.debug(f"Retrieving inputs for {task}") - # TODO: add state idx to retrieve values to reduce waiting - task.definition._resolve_lazy_fields(wf) if is_workflow(task): - await task.run(self) - # single task + await task.run_async(rerun=self.rerun) else: task_futures.add(self.worker.run(task, rerun=self.rerun)) task_futures = await self.worker.fetch_finished(task_futures) @@ -389,7 +377,17 @@ class NodeExecution(ty.Generic[DefType]): _tasks: dict[StateIndex | None, "Task[DefType]"] | None - def __init__(self, node: "Node", submitter: Submitter): + workflow_inputs: "WorkflowDef" + + graph: DiGraph["NodeExecution"] | None + + def __init__( + self, + node: "Node", + submitter: Submitter, + workflow_inputs: "WorkflowDef", + exec_graph: DiGraph["NodeExecution"], + ): self.name = node.name self.node = node self.submitter = submitter @@ -401,6 +399,8 @@ def __init__(self, node: "Node", submitter: Submitter): self.running = {} self.unrunnable = defaultdict(list) self.state_names = self.node.state.names + self.workflow_inputs = workflow_inputs + self.graph = None @property def inputs(self) -> "Node.Inputs": @@ -449,14 +449,22 @@ def all_failed(self) -> bool: def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: if self.node.state is None: yield Task( - definition=self.node._definition, + definition=self.node._definition._resolve_lazy_inputs( + workflow_inputs=self.workflow_inputs, + exec_graph=self.graph, + state_index=None, + ), submitter=self.submitter, name=self.node.name, ) else: for index, split_defn in self.node._split_definition().items(): yield Task( - definition=split_defn, + definition=split_defn._resolve_lazy_inputs( + workflow_inputs=self.workflow_inputs, + exec_graph=self.graph, + state_index=index, + ), submitter=self.submitter, name=self.node.name, state_index=index, diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index a80089f5a5..94cb71b49e 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -140,7 +140,7 @@ def test_docker_inputspec_1(tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", ) @@ -173,7 +173,7 @@ def test_docker_inputspec_1a(tmp_path): name="file", type=File, default=filename, - position=1, + argpos=1, argstr="", help="input file", ) @@ -206,7 +206,7 @@ def test_docker_inputspec_2(plugin, tmp_path): shell.arg( name="file1", type=File, - position=1, + argpos=1, argstr="", help="input file 1", ), @@ -214,7 +214,7 @@ def test_docker_inputspec_2(plugin, tmp_path): name="file2", type=File, default=filename_2, - position=2, + argpos=2, argstr="", help="input file 2", ), @@ -250,14 +250,14 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): name="file1", type=File, default=filename_1, - position=1, + argpos=1, argstr="", help="input file 1", ), shell.arg( name="file2", type=File, - position=2, + argpos=2, argstr="", help="input file 2", ), @@ -295,14 +295,14 @@ def test_docker_inputspec_2a(plugin, tmp_path): name="file1", type=File, default=filename_1, - position=1, + argpos=1, argstr="", help="input file 1", ), shell.arg( name="file2", type=File, - position=2, + argpos=2, argstr="", help="input file 2", ), @@ -332,7 +332,7 @@ def test_docker_inputspec_3(plugin, tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", container_path=True, @@ -368,7 +368,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): shell.arg( name="orig_file", type=File, - position=1, + argpos=1, argstr="", help="orig file", copyfile="copy", @@ -418,7 +418,7 @@ def test_docker_inputspec_state_1(plugin, tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", ) @@ -454,7 +454,7 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", ) @@ -483,7 +483,7 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", ) @@ -525,7 +525,7 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", ) @@ -569,7 +569,7 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - position=1, + argpos=1, argstr="", help="input file", ) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 64ba2831f8..3203d30272 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -177,7 +177,7 @@ def create_shelly_inputfile(tempdir, filename, name, executable): shell.arg( name="file", type=File, - position=1, + argpos=1, help="files", argstr="", ) @@ -352,7 +352,7 @@ def create_shelly_outputfile(tempdir, filename, name, executable="cp"): shell.arg( name="file_orig", type=File, - position=2, + argpos=2, help="new file", argstr="", ), diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index de2b94a9f0..224af25fba 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -4,9 +4,12 @@ import os import struct import inspect +import re from datetime import datetime import typing as ty import types +import ast +import cloudpickle as cp from pathlib import Path from collections.abc import Mapping from functools import singledispatch @@ -26,6 +29,8 @@ logger = logging.getLogger("pydra") +FUNCTION_SRC_CHUNK_LEN_DEFAULT = 8192 + try: from typing import Protocol except ImportError: @@ -522,8 +527,28 @@ def bytes_repr_set(obj: Set, cache: Cache) -> Iterator[bytes]: @register_serializer def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes]: + """Serialize a function, attempting to use the AST of the source code if available + otherwise falling back to using cloudpickle to serialize the byte-code of the + function.""" + try: + src = inspect.getsource(obj) + except OSError: + # Fallback to using cloudpickle to serialize the function if the source + # code is not available + bytes_repr = cp.dumps(obj) + else: + indent = re.match(r"(\s*)", src).group(1) + if indent: + src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) + src_ast = ast.parse(src) + # Remove the function definition from the source code + bytes_repr = ast.dump( + src_ast, annotate_fields=False, include_attributes=False + ).encode() + yield b"function:(" - yield hash_single(inspect.getsource(obj), cache) + for i in range(0, len(bytes_repr), FUNCTION_SRC_CHUNK_LEN_DEFAULT): + yield hash_single(bytes_repr[i : i + FUNCTION_SRC_CHUNK_LEN_DEFAULT], cache) yield b")" From 6a590e9db7e564bba923eccfc688862fee5fbfb9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 Jan 2025 11:34:07 +1100 Subject: [PATCH 144/342] moved resolution of lazy fields to point where tasks are initialised --- new-docs/source/tutorial/tst.py | 2 +- pydra/engine/core.py | 14 +++++++++++--- pydra/engine/specs.py | 4 ++-- pydra/engine/submitter.py | 3 +-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index b563e236af..18f70c7d68 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -9,7 +9,7 @@ def TenToThePower(p: int) -> int: ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) # Run the 5 tasks in parallel split across 3 processes -outputs = ten_to_the_power(worker="cf", n_procs=3) +outputs = ten_to_the_power(worker="debug") p1, p2, p3, p4, p5 = outputs.out diff --git a/pydra/engine/core.py b/pydra/engine/core.py index ca3e35f41b..bf205d27ac 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -206,7 +206,7 @@ def checksum(self): if self._checksum is not None: return self._checksum input_hash = self.definition._hash - self._checksum = create_checksum(self.__class__.__name__, input_hash) + self._checksum = create_checksum(self.definition.__class__.__name__, input_hash) return self._checksum @property @@ -352,7 +352,9 @@ def run(self, rerun: bool = False): cwd = os.getcwd() self._populate_filesystem() os.chdir(self.output_dir) - result = Result(outputs=None, runtime=None, errored=False, task=self) + result = Result( + outputs=None, runtime=None, errored=False, output_dir=self.output_dir + ) self.hooks.pre_run_task(self) self.audit.start_audit(odir=self.output_dir) if self.audit.audit_check(AuditFlag.PROV): @@ -402,7 +404,9 @@ async def run_async(self, rerun: bool = False): return result cwd = os.getcwd() self._populate_filesystem() - result = Result(outputs=None, runtime=None, errored=False, task=self) + result = Result( + outputs=None, runtime=None, errored=False, output_dir=self.output_dir + ) self.hooks.pre_run_task(self) self.audit.start_audit(odir=self.output_dir) try: @@ -549,6 +553,10 @@ def _check_for_hash_changes(self): self.definition._hashes, ) + def _write_notebook(self): + """Writes a notebook into the""" + raise NotImplementedError + SUPPORTED_COPY_MODES = FileSet.CopyMode.any DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 95f151fd5f..3ee35c8aea 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -517,7 +517,7 @@ class Runtime: class Result(ty.Generic[OutputsType]): """Metadata regarding the outputs of processing.""" - task: "Task[DefType]" + output_dir: Path outputs: OutputsType | None = None runtime: Runtime | None = None errored: bool = False @@ -550,7 +550,7 @@ def get_output_field(self, field_name): @property def errors(self): if self.errored: - with open(self.task.output_dir / "_error.pklz", "rb") as f: + with open(self.output_dir / "_error.pklz", "rb") as f: return cp.load(f) return None diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 72922983fb..17102153a4 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -386,7 +386,6 @@ def __init__( node: "Node", submitter: Submitter, workflow_inputs: "WorkflowDef", - exec_graph: DiGraph["NodeExecution"], ): self.name = node.name self.node = node @@ -462,7 +461,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: yield Task( definition=split_defn._resolve_lazy_inputs( workflow_inputs=self.workflow_inputs, - exec_graph=self.graph, + graph=self.graph, state_index=index, ), submitter=self.submitter, From e3418b3f4f5fad637d2d96e245f5978c8ff8bb7f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 Jan 2025 16:46:57 +1100 Subject: [PATCH 145/342] got concurrent futures worker to work --- new-docs/source/tutorial/tst.py | 18 +- pydra/engine/core.py | 4 +- pydra/engine/helpers.py | 18 +- pydra/engine/specs.py | 9 +- pydra/engine/submitter.py | 11 +- pydra/engine/workers.py | 297 +++++++++++++------------------- 6 files changed, 150 insertions(+), 207 deletions(-) diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 18f70c7d68..60000fd956 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,16 +1,16 @@ from pydra.design import python +if __name__ == "__main__": -@python.define -def TenToThePower(p: int) -> int: - return 10**p + @python.define + def TenToThePower(p: int) -> int: + return 10**p + ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) -ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) + # Run the 5 tasks in parallel split across 3 processes + outputs = ten_to_the_power(worker="cf", n_procs=3) -# Run the 5 tasks in parallel split across 3 processes -outputs = ten_to_the_power(worker="debug") + p1, p2, p3, p4, p5 = outputs.out -p1, p2, p3, p4, p5 = outputs.out - -print(f"10^5 = {p5}") + print(f"10^5 = {p5}") diff --git a/pydra/engine/core.py b/pydra/engine/core.py index bf205d27ac..ab34be2026 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -325,6 +325,8 @@ def _populate_filesystem(self): if not self.can_resume and self.output_dir.exists(): shutil.rmtree(self.output_dir) self.output_dir.mkdir(parents=False, exist_ok=self.can_resume) + # Save task pkl into the output directory for future reference + save(self.output_dir, task=self) def run(self, rerun: bool = False): """Prepare the task working directory, execute the task definition, and save the @@ -382,7 +384,7 @@ def run(self, rerun: bool = False): self._check_for_hash_changes() return result - async def run_async(self, rerun: bool = False): + async def run_async(self, rerun: bool = False) -> Result: """Prepare the task working directory, execute the task definition asynchronously, and save the results. NB: only workflows are run asynchronously at the moment. diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index a158c5c319..4acfcc342d 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -463,7 +463,7 @@ def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: resultfile = task.output_dir / "_result.pklz" try: - task(rerun=rerun) + task.run(rerun=rerun) except Exception as e: # creating result and error files if missing errorfile = task.output_dir / "_error.pklz" @@ -479,16 +479,16 @@ def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: return resultfile -async def load_and_run_async(task_pkl): - """ - loading a task from a pickle file, settings proper input - and running the workflow - """ - task = load_task(task_pkl=task_pkl) - await task() +# async def load_and_run_async(task_pkl): +# """ +# loading a task from a pickle file, settings proper input +# and running the workflow +# """ +# task = load_task(task_pkl=task_pkl) +# await task() -def load_task(task_pkl): +def load_task(task_pkl: Path | str) -> "Task[DefType]": """loading a task from a pickle file, settings proper input for the specific ind""" if isinstance(task_pkl, str): task_pkl = Path(task_pkl) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 3ee35c8aea..b72d1eed8f 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -876,6 +876,11 @@ def _resolve_value( class ShellDef(TaskDef[ShellOutputsType]): + arguments: ty.List[str] = shell.arg( + default=attrs.Factory(list), + help="Additional arguments to pass to the command.", + ) + RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) def _run(self, task: "Task[ShellDef]") -> None: @@ -952,7 +957,9 @@ def _command_args( # Sort command and arguments by position cmd_args = position_sort(pos_args) # pos_args values are each a list of arguments, so concatenate lists after sorting - return sum(cmd_args, []) + command_args = sum(cmd_args, []) + command_args += self.arguments + return command_args def _command_shelltask_executable( self, field: shell.arg, value: ty.Any diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 17102153a4..50fd2a81be 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -9,7 +9,6 @@ from copy import copy from collections import defaultdict from .workers import Worker, WORKERS -from .core import is_workflow from .graph import DiGraph from .helpers import ( get_open_loop, @@ -153,10 +152,10 @@ def Split(defn: TaskDef) -> tuple: "Use the `split` method to split the task before combining." ) task = Task(task_def, submitter=self, name="task", environment=self.environment) - if task.is_async: - self.loop.run_until_complete(task.run_async(rerun=self.rerun)) + if task.is_async: # Only workflow tasks can be async + self.loop.run_until_complete(self.worker.run_async(task, rerun=self.rerun)) else: - task.run(rerun=self.rerun) + self.worker.run(rerun=self.rerun) PersistentCache().clean_up() result = task.result() if result is None: @@ -285,8 +284,8 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non ) raise RuntimeError(msg) for task in tasks: - if is_workflow(task): - await task.run_async(rerun=self.rerun) + if task.is_async: + await self.worker.run_async(task, rerun=self.rerun) else: task_futures.add(self.worker.run(task, rerun=self.rerun)) task_futures = await self.worker.fetch_finished(task_futures) diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 2c4dc533fc..e129fb0c70 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -3,11 +3,14 @@ import asyncio import sys import json +import abc import re +import inspect import typing as ty from tempfile import gettempdir from pathlib import Path from shutil import copyfile, which +import cloudpickle as cp import concurrent.futures as cf from .core import Task from .specs import TaskDef @@ -15,7 +18,6 @@ get_available_cpus, read_and_display_async, save, - load_and_run, load_task, ) @@ -24,27 +26,38 @@ logger = logging.getLogger("pydra.worker") +if ty.TYPE_CHECKING: + from .specs import Result + DefType = ty.TypeVar("DefType", bound="TaskDef") -class Worker: +class Worker(metaclass=abc.ABCMeta): """A base class for execution of tasks.""" plugin_name: str - is_async: bool = True def __init__(self, loop=None): """Initialize the worker.""" logger.debug(f"Initializing {self.__class__.__name__}") self.loop = loop - def run(self, task: "Task[DefType]", **kwargs): + @abc.abstractmethod + def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": """Return coroutine for task execution.""" - raise NotImplementedError + pass + + async def run_async(self, task: "Task[DefType]", rerun: bool = False) -> "Result": + return await task.run_async(rerun=rerun) def close(self): """Close this worker.""" + @property + def is_async(self) -> bool: + """Return whether the worker is asynchronous.""" + return inspect.iscoroutinefunction(self.run) + async def fetch_finished(self, futures): """ Awaits asyncio's :class:`asyncio.Task` until one is finished. @@ -133,7 +146,6 @@ class DebugWorker(Worker): """A worker to execute linearly.""" plugin_name: str = "debug" - is_async: bool = False def __init__(self, **kwargs): """Initialize worker.""" @@ -141,26 +153,17 @@ def __init__(self, **kwargs): def run( self, - task: "Task[DefType] | tuple[Path, Task[DefType]]", + task: "Task[DefType]", rerun: bool = False, - ): + ) -> "Result": """Run a task.""" - if isinstance(task, Task): - return task.run(rerun=rerun) - else: # it could be tuple that includes pickle files with tasks and inputs - task_main_pkl, _ = task - return load_and_run(task_main_pkl, rerun=rerun) + return task.run(rerun=rerun) def close(self): """Return whether the task is finished.""" async def fetch_finished(self, futures): - for future in futures: - await future - return set() - - # async def fetch_finished(self, futures): - # return await asyncio.wait(futures) + raise NotImplementedError("DebugWorker does not support async execution") class ConcurrentFuturesWorker(Worker): @@ -168,7 +171,10 @@ class ConcurrentFuturesWorker(Worker): plugin_name = "cf" - def __init__(self, n_procs=None): + n_procs: int + loop: cf.ProcessPoolExecutor + + def __init__(self, n_procs: int | None = None): """Initialize Worker.""" super().__init__() self.n_procs = get_available_cpus() if n_procs is None else n_procs @@ -177,26 +183,23 @@ def __init__(self, n_procs=None): # self.loop = asyncio.get_event_loop() logger.debug("Initialize ConcurrentFuture") - def run( + async def run( self, task: "Task[DefType]", rerun: bool = False, - **kwargs, - ): + ) -> "Result": """Run a task.""" assert self.loop, "No event loop available to submit tasks" - return self.exec_as_coro(task, rerun=rerun) - - async def exec_as_coro(self, runnable: "Task[DefType]", rerun: bool = False): - """Run a task (coroutine wrapper).""" - if isinstance(runnable, Task): - res = await self.loop.run_in_executor(self.pool, runnable.run, rerun) - else: # it could be tuple that includes pickle files with tasks and inputs - task_main_pkl, task_orig = runnable - res = await self.loop.run_in_executor( - self.pool, load_and_run, task_main_pkl - ) - return res + task_pkl = cp.dumps(task) + return await self.loop.run_in_executor( + self.pool, self.unpickle_and_run, task_pkl, rerun + ) + + @classmethod + def unpickle_and_run(cls, task_pkl: Path, rerun: bool) -> "Result": + """Unpickle and run a task.""" + task: Task[DefType] = cp.loads(task_pkl) + return task.run(rerun=rerun) def close(self): """Finalize the internal pool of tasks.""" @@ -233,22 +236,6 @@ def __init__(self, loop=None, max_jobs=None, poll_delay=1, sbatch_args=None): self.sbatch_args = sbatch_args or "" self.error = {} - def run(self, task: "Task[DefType]", rerun: bool = False): - """Worker submission API.""" - script_dir, batch_script = self._prepare_runscripts(task, rerun=rerun) - if (script_dir / script_dir.parts[1]) == gettempdir(): - logger.warning("Temporary directories may not be shared across computers") - if isinstance(task, Task): - cache_dir = task.cache_dir - name = task.name - uid = task.uid - else: # runnable is a tuple (ind, pkl file, task) - cache_dir = task[-1].cache_dir - name = task[-1].name - uid = f"{task[-1].uid}_{task[0]}" - - return self._submit_job(batch_script, name=name, uid=uid, cache_dir=cache_dir) - def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): if isinstance(task, Task): cache_dir = task.cache_dir @@ -274,7 +261,7 @@ def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): batchscript = script_dir / f"batchscript_{uid}.sh" python_string = ( f"""'from pydra.engine.helpers import load_and_run; """ - f"""load_and_run(task_pkl="{task_pkl}", ind={ind}, rerun={rerun}) '""" + f"""load_and_run("{task_pkl}", rerun={rerun}) '""" ) bcmd = "\n".join( ( @@ -287,13 +274,16 @@ def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): fp.writelines(bcmd) return script_dir, batchscript - async def _submit_job(self, batchscript, name, uid, cache_dir): - """Coroutine that submits task runscript and polls job until completion or error.""" - script_dir = cache_dir / f"{self.__class__.__name__}_scripts" / uid + async def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": + """Worker submission API.""" + script_dir, batch_script = self._prepare_runscripts(task, rerun=rerun) + if (script_dir / script_dir.parts[1]) == gettempdir(): + logger.warning("Temporary directories may not be shared across computers") + script_dir = task.cache_dir / f"{self.__class__.__name__}_scripts" / task.uid sargs = self.sbatch_args.split() jobname = re.search(r"(?<=-J )\S+|(?<=--job-name=)\S+", self.sbatch_args) if not jobname: - jobname = ".".join((name, uid)) + jobname = ".".join((task.name, task.uid)) sargs.append(f"--job-name={jobname}") output = re.search(r"(?<=-o )\S+|(?<=--output=)\S+", self.sbatch_args) if not output: @@ -305,7 +295,7 @@ async def _submit_job(self, batchscript, name, uid, cache_dir): sargs.append(f"--error={error_file}") else: error_file = None - sargs.append(str(batchscript)) + sargs.append(str(batch_script)) # TO CONSIDER: add random sleep to avoid overloading calls rc, stdout, stderr = await read_and_display_async( "sbatch", *sargs, hide_display=True @@ -332,12 +322,12 @@ async def _submit_job(self, batchscript, name, uid, cache_dir): and "--no-requeue" not in self.sbatch_args ): # loading info about task with a specific uid - info_file = cache_dir / f"{uid}_info.json" + info_file = task.cache_dir / f"{task.uid}_info.json" if info_file.exists(): checksum = json.loads(info_file.read_text())["checksum"] - if (cache_dir / f"{checksum}.lock").exists(): + if (task.cache_dir / f"{checksum}.lock").exists(): # for pyt3.8 we could you missing_ok=True - (cache_dir / f"{checksum}.lock").unlink() + (task.cache_dir / f"{checksum}.lock").unlink() cmd_re = ("scontrol", "requeue", jobid) await read_and_display_async(*cmd_re, hide_display=True) else: @@ -463,38 +453,6 @@ def __init__( self.default_qsub_args = default_qsub_args self.max_mem_free = max_mem_free - def run(self, task: "Task[DefType]", rerun: bool = False): # TODO: add env - """Worker submission API.""" - ( - script_dir, - batch_script, - task_pkl, - ind, - output_dir, - task_qsub_args, - ) = self._prepare_runscripts(task, rerun=rerun) - if (script_dir / script_dir.parts[1]) == gettempdir(): - logger.warning("Temporary directories may not be shared across computers") - if isinstance(task, Task): - cache_dir = task.cache_dir - name = task.name - uid = task.uid - else: # runnable is a tuple (ind, pkl file, task) - cache_dir = task[-1].cache_dir - name = task[-1].name - uid = f"{task[-1].uid}_{task[0]}" - - return self._submit_job( - batch_script, - name=name, - uid=uid, - cache_dir=cache_dir, - task_pkl=task_pkl, - ind=ind, - output_dir=output_dir, - task_qsub_args=task_qsub_args, - ) - def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): if isinstance(task, Task): cache_dir = task.cache_dir @@ -566,17 +524,19 @@ async def check_for_results_files(self, jobid, threads_requested): del self.result_files_by_jobid[jobid][task] self.threads_used -= threads_requested - async def _submit_jobs( - self, - batchscript, - name, - uid, - cache_dir, - output_dir, - task_qsub_args, - interpreter="/bin/sh", - ): - # Get the number of slots requested for this task + async def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": + """Worker submission API.""" + ( + script_dir, + batch_script, + task_pkl, + ind, + output_dir, + task_qsub_args, + ) = self._prepare_runscripts(task, rerun=rerun) + if (script_dir / script_dir.parts[1]) == gettempdir(): + logger.warning("Temporary directories may not be shared across computers") + interpreter = "/bin/sh" threads_requested = self.default_threads_per_task if "smp" in task_qsub_args: smp_index = task_qsub_args.split().index("smp") @@ -618,12 +578,11 @@ async def _submit_jobs( python_string = f"""import sys; from pydra.engine.helpers import load_and_run; \ task_pkls={[task_tuple for task_tuple in tasks_to_run]}; \ task_index=int(sys.argv[1])-1; \ - load_and_run(task_pkl=task_pkls[task_index][0], \ - ind=task_pkls[task_index][1], rerun=task_pkls[task_index][2])""" + load_and_run(task_pkls[task_index][0], rerun=task_pkls[task_index][1])""" bcmd_job = "\n".join( ( f"#!{interpreter}", - f"{sys.executable} {Path(batchscript).with_suffix('.py')}" + f"{sys.executable} {Path(batch_script).with_suffix('.py')}" + " $SGE_TASK_ID", ) ) @@ -632,13 +591,15 @@ async def _submit_jobs( # Better runtime when the python contents are written to file # rather than given by cmdline arg -c - with Path(batchscript).with_suffix(".py").open("wt") as fp: + with Path(batch_script).with_suffix(".py").open("wt") as fp: fp.write(bcmd_py) - with batchscript.open("wt") as fp: + with batch_script.open("wt") as fp: fp.writelines(bcmd_job) - script_dir = cache_dir / f"{self.__class__.__name__}_scripts" / uid + script_dir = ( + task.cache_dir / f"{self.__class__.__task.name__}_scripts" / task.uid + ) script_dir.mkdir(parents=True, exist_ok=True) sargs = ["-t"] sargs.append(f"1-{len(tasks_to_run)}") @@ -647,7 +608,7 @@ async def _submit_jobs( jobname = re.search(r"(?<=-N )\S+", task_qsub_args) if not jobname: - jobname = ".".join((name, uid)) + jobname = ".".join((task.name, task.uid)) sargs.append("-N") sargs.append(jobname) output = re.search(r"(?<=-o )\S+", self.qsub_args) @@ -665,7 +626,7 @@ async def _submit_jobs( sargs.append(error_file) else: error_file = None - sargs.append(str(batchscript)) + sargs.append(str(batch_script)) await asyncio.sleep(random.uniform(0, 5)) @@ -697,7 +658,12 @@ async def _submit_jobs( exit_status = await self._verify_exit_code(jobid) if exit_status == "ERRORED": jobid = await self._rerun_job_array( - cache_dir, uid, sargs, tasks_to_run, error_file, jobid + task.cache_dir, + task.uid, + sargs, + tasks_to_run, + error_file, + jobid, ) else: for task_pkl, ind, rerun in tasks_to_run: @@ -710,17 +676,27 @@ async def _submit_jobs( exit_status = await self._verify_exit_code(jobid) if exit_status == "ERRORED": jobid = await self._rerun_job_array( - cache_dir, uid, sargs, tasks_to_run, error_file, jobid + task.cache_dir, + task.uid, + sargs, + tasks_to_run, + error_file, + jobid, ) poll_counter = 0 poll_counter += 1 await asyncio.sleep(self.poll_delay) else: - done = await self._poll_job(jobid, cache_dir) + done = await self._poll_job(jobid, task.cache_dir) if done: if done == "ERRORED": # If the SGE job was evicted, rerun it jobid = await self._rerun_job_array( - cache_dir, uid, sargs, tasks_to_run, error_file, jobid + task.cache_dir, + task.uid, + sargs, + tasks_to_run, + error_file, + jobid, ) else: self.job_completed_by_jobid[jobid] = True @@ -891,28 +867,15 @@ def __init__(self, **kwargs): self.client_args = kwargs logger.debug("Initialize Dask") - def run( + async def run( self, task: "Task[DefType]", rerun: bool = False, - **kwargs, - ): - """Run a task.""" - return self.exec_dask(task, rerun=rerun) - - async def exec_dask(self, task: "Task[DefType]", rerun: bool = False): - """Run a task (coroutine wrapper).""" + ) -> "Result": from dask.distributed import Client async with Client(**self.client_args, asynchronous=True) as client: - if isinstance(task, Task): - future = client.submit(task) - result = await future - else: # it could be a path to a pickled task file - assert isinstance(task, Path) - future = client.submit(load_and_run, task) - result = await future - return result + return await client.submit(task.run, rerun) def close(self): """Finalize the internal pool of tasks.""" @@ -939,15 +902,6 @@ def __init__(self, **kwargs): logger.debug("Initialize PsijWorker") self.psij = psij - def run( - self, - task: "Task[DefType]", - rerun: bool = False, - **kwargs, - ): - """Run a task.""" - return self.exec_psij(task, rerun=rerun) - def make_spec(self, cmd=None, arg=None): """ Create a PSI/J job specification. @@ -964,13 +918,13 @@ def make_spec(self, cmd=None, arg=None): psij.JobDef PSI/J job specification. """ - definition = self.psij.JobDef() - definition.executable = cmd - definition.arguments = arg + spec = self.psij.JobSpec() + spec.executable = cmd + spec.arguments = arg - return definition + return spec - def make_job(self, definition, attributes): + def make_job(self, spec, attributes): """ Create a PSI/J job. @@ -987,14 +941,14 @@ def make_job(self, definition, attributes): PSI/J job. """ job = self.psij.Job() - job.definition = definition + job.spec = spec return job - async def exec_psij( + async def run( self, task: "Task[DefType]", rerun: bool = False, - ): + ) -> "Result": """ Run a task (coroutine wrapper). @@ -1013,50 +967,31 @@ async def exec_psij( jex = self.psij.JobExecutor.get_instance(self.subtype) absolute_path = Path(__file__).parent - if isinstance(task, Task): - cache_dir = task.cache_dir - file_path = cache_dir / "runnable_function.pkl" - with open(file_path, "wb") as file: - pickle.dump(task.run, file) - func_path = absolute_path / "run_pickled.py" - definition = self.make_spec("python", [func_path, file_path]) - else: # it could be tuple that includes pickle files with tasks and inputs - cache_dir = task[-1].cache_dir - file_path_1 = cache_dir / "taskmain.pkl" - file_path_2 = cache_dir / "ind.pkl" - ind, task_main_pkl, task_orig = task - with open(file_path_1, "wb") as file: - pickle.dump(task_main_pkl, file) - with open(file_path_2, "wb") as file: - pickle.dump(ind, file) - func_path = absolute_path / "run_pickled.py" - definition = self.make_spec( - "python", - [ - func_path, - file_path_1, - file_path_2, - ], - ) + cache_dir = task.cache_dir + file_path = cache_dir / "runnable_function.pkl" + with open(file_path, "wb") as file: + pickle.dump(task.run, file) + func_path = absolute_path / "run_pickled.py" + spec = self.make_spec("python", [func_path, file_path]) if rerun: - definition.arguments.append("--rerun") + spec.arguments.append("--rerun") - definition.stdout_path = cache_dir / "demo.stdout" - definition.stderr_path = cache_dir / "demo.stderr" + spec.stdout_path = cache_dir / "demo.stdout" + spec.stderr_path = cache_dir / "demo.stderr" - job = self.make_job(definition, None) + job = self.make_job(spec, None) jex.submit(job) job.wait() - if definition.stderr_path.stat().st_size > 0: - with open(definition.stderr_path, "r") as stderr_file: + if spec.stderr_path.stat().st_size > 0: + with open(spec.stderr_path, "r") as stderr_file: stderr_contents = stderr_file.read() raise Exception( - f"stderr_path '{definition.stderr_path}' is not empty. Contents:\n{stderr_contents}" + f"stderr_path '{spec.stderr_path}' is not empty. Contents:\n{stderr_contents}" ) - return + return task.result() def close(self): """Finalize the internal pool of tasks.""" From e59d86edc7150c595e888c4bd92f79225da816eb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 24 Jan 2025 20:03:28 +1100 Subject: [PATCH 146/342] debugging cf worker --- .../tutorial/2-advanced-execution.ipynb | 135 +++++++++++------- pydra/design/shell.py | 2 + pydra/engine/core.py | 4 +- pydra/engine/specs.py | 25 ++-- pydra/engine/submitter.py | 6 +- 5 files changed, 107 insertions(+), 65 deletions(-) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 068546252c..581ac71f41 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -32,22 +32,28 @@ "\n", "Pydra supports several workers with which to execute tasks\n", "\n", + "- `debug` (default)\n", "- `cf`\n", "- `slurm`\n", "- `sge`\n", + "- `psij`\n", "- `dask` (experimental)\n", - "- `debug`\n", "\n", - "By default, the *cf* (*ConcurrentFutures*) worker is used, which\n", - "executes tasks across multiple processes. If you are using a high-performance cluster (HPC)\n", - "then the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", - "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) workers can be used to submit each\n", - "workflow node as separate jobs to the HPC scheduler. There is also an\n", - "experimental [Dask](https://www.dask.org/) worker.\n", + "By default, the *debug* worker is used, which runs tasks serially in a single process\n", + "without use of the `asyncio` module. This makes it easier to debug errors in workflows\n", + "and python tasks, however, when using in Pydra in production you will typically want to\n", + "parallelise the execution for efficiency.\n", "\n", - "When using a debugger in the development of a workflow or Python tasks, the\n", - "*debug* worker is recommended as it executes nodes \"synchronously\" (as opposed to\n", - "asynchronously), and can therefore break on uncaught exceptions.\n", + "If running on a local workstation, then the `cf` (*ConcurrentFutures*) worker is a good\n", + "option because it is able to spread the tasks to be run over multiple processes and\n", + "maximise CPU usage.\n", + "\n", + "If you have access to a high-performance cluster (HPC) then\n", + "the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", + "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) and [PSI/J](https://exaworks.org/psij)\n", + "workers can be used to submit each workflow node as separate jobs to the HPC scheduler.\n", + "There is also an experimental [Dask](https://www.dask.org/) worker, which provides a\n", + "range of execution backends to choose from.\n", "\n", "To specify a worker, the abbreviation can be passed either as a string or using the\n", "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", @@ -65,53 +71,73 @@ "name": "stderr", "output_type": "stream", "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev141+g03c7438b.d20250123\n" + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" ] }, { - "ename": "IndentationError", - "evalue": "unexpected indent (, line 1)", + "ename": "RuntimeError", + "evalue": "Graph of 'Workflow(name='Split', inputs=Split(_constructed=None, defn=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), constructor=.Split at 0x114510d60>), outputs=SplitOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=...), _state=, _cont_dim=None, _inner_cont_dim={}))), _nodes={'TenToThePower': Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=...)), _state=, _cont_dim=None, _inner_cont_dim={})})' workflow is not empty, but not able to get more tasks - something has gone wrong when retrieving the results predecessors:\n\n", "output_type": "error", "traceback": [ - "Traceback \u001b[0;36m(most recent call last)\u001b[0m:\n", - "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/IPython/core/interactiveshell.py:3577\u001b[0m in \u001b[1;35mrun_code\u001b[0m\n exec(code_obj, self.user_global_ns, self.user_ns)\u001b[0m\n", - "\u001b[0m Cell \u001b[1;32mIn[2], line 10\u001b[0m\n outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/specs.py:193\u001b[0m in \u001b[1;35m__call__\u001b[0m\n result = sub(self)\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/submitter.py:157\u001b[0m in \u001b[1;35m__call__\u001b[0m\n self.loop.run_until_complete(task.run_async(rerun=self.rerun))\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m in \u001b[1;35mrun_until_complete\u001b[0m\n return f.result()\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m in \u001b[1;35mresult\u001b[0m\n raise self._exception.with_traceback(self._exception_tb)\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m in \u001b[1;35m__step_run_and_handle_result\u001b[0m\n result = coro.send(None)\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:398\u001b[0m in \u001b[1;35mrun_async\u001b[0m\n \"'%s' is attempting to acquire lock on %s\", self.name, self.lockfile\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:212\u001b[0m in \u001b[1;35mlockfile\u001b[0m\n return self.output_dir.with_suffix(\".lock\")\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:261\u001b[0m in \u001b[1;35moutput_dir\u001b[0m\n return self.cache_dir / self.checksum\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/core.py:206\u001b[0m in \u001b[1;35mchecksum\u001b[0m\n input_hash = self.definition._hash\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/specs.py:361\u001b[0m in \u001b[1;35m_hash\u001b[0m\n hsh, self._hashes = self._compute_hashes()\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/engine/specs.py:384\u001b[0m in \u001b[1;35m_compute_hashes\u001b[0m\n k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items()\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:206\u001b[0m in \u001b[1;35mhash_function\u001b[0m\n return hash_object(obj, **kwargs).hex()\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:237\u001b[0m in \u001b[1;35mhash_object\u001b[0m\n raise e\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:225\u001b[0m in \u001b[1;35mhash_object\u001b[0m\n return hash_single(obj, cache)\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:281\u001b[0m in \u001b[1;35mhash_single\u001b[0m\n first = next(bytes_it)\u001b[0m\n", - "\u001b[0m File \u001b[1;32m~/git/workflows/pydra/pydra/utils/hash.py:537\u001b[0m in \u001b[1;35mbytes_repr_function\u001b[0m\n src_ast = ast.parse(src)\u001b[0m\n", - "\u001b[0;36m File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/ast.py:52\u001b[0;36m in \u001b[0;35mparse\u001b[0;36m\n\u001b[0;31m return compile(source, filename, mode, flags,\u001b[0;36m\n", - "\u001b[0;36m File \u001b[0;32m:1\u001b[0;36m\u001b[0m\n\u001b[0;31m @workflow.define(outputs=output_types)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unexpected indent\nand therefore cannot hash `.Split at 0x11443cd60>` of type `builtins.function`. Consider implementing a specific `bytes_repr()`(see pydra.utils.hash.register_serializer) or a `__bytes_repr__()` dunder methods for this type\n" + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m ten_to_the_power \u001b[38;5;241m=\u001b[39m TenToThePower()\u001b[38;5;241m.\u001b[39msplit(p\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m])\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Run the 5 tasks in parallel split across 3 processes\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mten_to_the_power\u001b[49m\u001b[43m(\u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_procs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 14\u001b[0m p1, p2, p3, p4, p5 \u001b[38;5;241m=\u001b[39m outputs\u001b[38;5;241m.\u001b[39mout\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m10^5 = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp5\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:194\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, **kwargs)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter(\n\u001b[1;32m 184\u001b[0m audit_flags\u001b[38;5;241m=\u001b[39maudit_flags,\n\u001b[1;32m 185\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 193\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[0;32m--> 194\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 196\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__notes__\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m WORKER_KWARG_FAIL_NOTE \u001b[38;5;129;01min\u001b[39;00m e\u001b[38;5;241m.\u001b[39m__notes__:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:156\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, task_def)\u001b[0m\n\u001b[1;32m 154\u001b[0m task \u001b[38;5;241m=\u001b[39m Task(task_def, submitter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtask\u001b[39m\u001b[38;5;124m\"\u001b[39m, environment\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment)\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task\u001b[38;5;241m.\u001b[39mis_async: \u001b[38;5;66;03m# Only workflow tasks can be async\u001b[39;00m\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_async\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 316\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:51\u001b[0m, in \u001b[0;36mWorker.run_async\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_async\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m, rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m task\u001b[38;5;241m.\u001b[39mrun_async(rerun\u001b[38;5;241m=\u001b[39mrerun)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:416\u001b[0m, in \u001b[0;36mTask.run_async\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 416\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39m_run_async(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 417\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:709\u001b[0m, in \u001b[0;36mWorkflowDef._run_async\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_async\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 708\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow asynchronously.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 709\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m task\u001b[38;5;241m.\u001b[39msubmitter\u001b[38;5;241m.\u001b[39mexpand_workflow_async(task)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:285\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow_async\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hashes_have_changed:\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 274\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSet loglevel to \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdebug\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m in order to track hash changes \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthroughout the execution of the workflow.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor more types in your interface inputs.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 284\u001b[0m )\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(msg)\n\u001b[1;32m 286\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task\u001b[38;5;241m.\u001b[39mis_async:\n", + "\u001b[0;31mRuntimeError\u001b[0m: Graph of 'Workflow(name='Split', inputs=Split(_constructed=None, defn=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), constructor=.Split at 0x114510d60>), outputs=SplitOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=...), _state=, _cont_dim=None, _inner_cont_dim={}))), _nodes={'TenToThePower': Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=...)), _state=, _cont_dim=None, _inner_cont_dim={})})' workflow is not empty, but not able to get more tasks - something has gone wrong when retrieving the results predecessors:\n\n" ] } ], "source": [ + "\n", "from pydra.design import python\n", "\n", - "@python.define\n", - "def TenToThePower(p: int) -> int:\n", - " return 10 ** p\n", + "if __name__ == \"__main__\":\n", + "\n", + " @python.define\n", + " def TenToThePower(p: int) -> int:\n", + " return 10 ** p\n", + "\n", + " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", "\n", - "ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", + " # Run the 5 tasks in parallel split across 3 processes\n", + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + "\n", + " p1, p2, p3, p4, p5 = outputs.out\n", + "\n", + " print(f\"10^5 = {p5}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, the worker object can be initialised in the calling code and passed directly to the execution call" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.workers import ConcurrentFuturesWorker\n", + "\n", + "ten_to_the_power = TenToThePower().split(p=[6, 7, 8, 9, 10])\n", "\n", "# Run the 5 tasks in parallel split across 3 processes\n", - "outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + "outputs = ten_to_the_power(worker=ConcurrentFuturesWorker(n_procs=3))\n", "\n", - "p1, p2, p3, p4, p5 = outputs.out\n", + "p6, p7, p8, p9, p10 = outputs.out\n", "\n", - "print(f\"10^5 = {p5}\")" + "print(f\"10^10 = {p10}\")" ] }, { @@ -141,10 +167,6 @@ "from fileformats.medimage import Nifti1\n", "from pydra.engine.submitter import Submitter\n", "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", - "import nest_asyncio\n", - "\n", - "# Allow running async code in Jupyter notebooks\n", - "nest_asyncio.apply()\n", "\n", "# Make directory filled with nifti files\n", "test_dir = Path(tempfile.mkdtemp())\n", @@ -153,19 +175,26 @@ "for i in range(10):\n", " Nifti1.sample(nifti_dir, seed=i)\n", "\n", - "VOXEL_SIZES = [0.5, 0.5, 0.5, 0.75, 0.75, 0.75, 1.0, 1.0, 1.0, 1.25]\n", - "\n", - "mrgrid_varying_vox_sizes = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", + " in_file=nifti_dir.iterdir()\n", ")\n", "\n", - "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid()\n", + "\n", + "# Create a new custom directory\n", + "cache_dir = test_dir / \"cache\"\n", + "cache_dir.mkdir()\n", + "\n", + "submitter = Submitter(cache_dir=cache_dir)\n", "\n", "# Run the task to resample all NIfTI files with different voxel sizes\n", "with submitter:\n", - " result1 = submitter(mrgrid_varying_vox_sizes)" + " result1 = submitter(mrgrid)\n", + "\n", + "print(result1)\n" ] }, { diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 6c8889b990..3a51b9f5d3 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -662,6 +662,8 @@ def remaining_positions( # Check for multiple positions positions = defaultdict(list) for arg in args: + if arg.name == "arguments": + continue if arg.position is not None: if arg.position >= 0: positions[arg.position].append(arg) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index ab34be2026..292235acde 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -496,7 +496,9 @@ def result(self, return_inputs=False): the result of the task """ if self.errored: - return Result(outputs=None, runtime=None, errored=True, task=self) + return Result( + outputs=None, runtime=None, errored=True, output_dir=self.output_dir + ) checksum = self.checksum result = load_result(checksum, self.cache_locations) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index b72d1eed8f..07e7c1a6ff 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -134,7 +134,7 @@ class TaskDef(ty.Generic[OutputsType]): def __call__( self, cache_dir: os.PathLike | None = None, - worker: "str | ty.Type[Worker] | Worker" = "cf", + worker: "str | ty.Type[Worker] | Worker" = "debug", environment: "Environment | None" = None, rerun: bool = False, cache_locations: ty.Iterable[os.PathLike] | None = None, @@ -205,10 +205,14 @@ def __call__( ) raise if result.errored: - raise RuntimeError( - f"Task {self} failed @ {result.errors['time of crash']} with following errors:\n" - + "\n".join(result.errors["error message"]) - ) + if isinstance(self, WorkflowDef) or self._splitter: + raise RuntimeError(f"Workflow {self} failed with errors:") + else: + errors = result.errors + raise RuntimeError( + f"Task {self} failed @ {errors['time of crash']} with following errors:\n" + + "\n".join(errors["error message"]) + ) return result.outputs def split( @@ -550,8 +554,10 @@ def get_output_field(self, field_name): @property def errors(self): if self.errored: - with open(self.output_dir / "_error.pklz", "rb") as f: - return cp.load(f) + error_file = self.output_dir / "_error.pklz" + if error_file.exists(): + with open(error_file, "rb") as f: + return cp.load(f) return None @@ -878,7 +884,8 @@ class ShellDef(TaskDef[ShellOutputsType]): arguments: ty.List[str] = shell.arg( default=attrs.Factory(list), - help="Additional arguments to pass to the command.", + sep=" ", + help="Additional free-form arguments to append to the end of the command.", ) RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) @@ -930,6 +937,8 @@ def _command_args( continue if name == "executable": pos_args.append(self._command_shelltask_executable(field, value)) + elif name == "arguments": + continue elif name == "args": pos_val = self._command_shelltask_args(field, value) if pos_val: diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 50fd2a81be..b9af104888 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -69,7 +69,7 @@ class Submitter: def __init__( self, cache_dir: os.PathLike | None = None, - worker: ty.Union[str, ty.Type[Worker]] = "cf", + worker: ty.Union[str, ty.Type[Worker]] = "debug", environment: "Environment | None" = None, rerun: bool = False, cache_locations: list[os.PathLike] | None = None, @@ -140,7 +140,7 @@ def __call__( output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} @workflow.define(outputs=output_types) - def Split(defn: TaskDef) -> tuple: + def Split(defn: TaskDef): node = workflow.add(defn) return tuple(getattr(node, o) for o in output_types) @@ -155,7 +155,7 @@ def Split(defn: TaskDef) -> tuple: if task.is_async: # Only workflow tasks can be async self.loop.run_until_complete(self.worker.run_async(task, rerun=self.rerun)) else: - self.worker.run(rerun=self.rerun) + self.worker.run(task, rerun=self.rerun) PersistentCache().clean_up() result = task.result() if result is None: From 834024e6ef29ce9b985af734c026e1c3cf75c320 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 25 Jan 2025 11:27:38 +1100 Subject: [PATCH 147/342] debugging cf execution within notebooks, implemented byte-code hashing of functions --- .../source/tutorial/1-getting-started.ipynb | 44 +++++ .../tutorial/2-advanced-execution.ipynb | 62 ++++--- new-docs/source/tutorial/tst.py | 2 +- pydra/engine/core.py | 11 +- pydra/engine/state.py | 5 + pydra/engine/submitter.py | 162 +++++++++++++----- pydra/utils/hash.py | 63 ++++--- 7 files changed, 251 insertions(+), 98 deletions(-) diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index 0cb4402c88..469cd8d50a 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -293,6 +293,50 @@ "print(\"\\n\".join(str(p) for p in outputs.out_file))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Executing tasks in parallel\n", + "\n", + "By default, Pydra will use the *debug* worker, which executes each task sequentially.\n", + "This makes it easier to debug tasks and workflows, however, in most cases, once a workflow\n", + "is ready to go, a concurrent worker is preferable so tasks can be executed in parallel\n", + "(see [Workers](./2-advanced-execution.html#Workers)). To use multiple processes on a\n", + "workstation, select the `cf` worker option when executing the task/workflow.\n", + "\n", + "Note that when multiprocessing in Python on Windows and macOS (and good practice on Linux/POSIX\n", + "OSs for compatibility), you need to place a `if __name__ == \"__main__\"` block when\n", + "executing in top-level scripts to allow the script to be imported, but not executed,\n", + "by subprocesses." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'nifti_dir' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;66;03m# <-- Add this block to allow the script to imported by subprocesses\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m MrGrid(operation\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregrid\u001b[39m\u001b[38;5;124m\"\u001b[39m, voxel\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m))\u001b[38;5;241m.\u001b[39msplit(in_file\u001b[38;5;241m=\u001b[39m\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 5\u001b[0m outputs \u001b[38;5;241m=\u001b[39m mrgrid(worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcf\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# <-- Select the \"cf\" worker here\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(p) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m outputs\u001b[38;5;241m.\u001b[39mout_file))\n", + "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" + ] + } + ], + "source": [ + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "if __name__ == \"__main__\": # <-- Add this block to allow the script to imported by subprocesses\n", + " mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5,0.5,0.5)).split(in_file=nifti_dir.iterdir())\n", + " outputs = mrgrid(worker=\"cf\") # <-- Select the \"cf\" worker here\n", + " print(\"\\n\".join(str(p) for p in outputs.out_file))" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 581ac71f41..7d54fd55b7 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -64,38 +64,21 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" - ] - }, - { - "ename": "RuntimeError", - "evalue": "Graph of 'Workflow(name='Split', inputs=Split(_constructed=None, defn=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), constructor=.Split at 0x114510d60>), outputs=SplitOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=...), _state=, _cont_dim=None, _inner_cont_dim={}))), _nodes={'TenToThePower': Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=...)), _state=, _cont_dim=None, _inner_cont_dim={})})' workflow is not empty, but not able to get more tasks - something has gone wrong when retrieving the results predecessors:\n\n", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 12\u001b[0m\n\u001b[1;32m 9\u001b[0m ten_to_the_power \u001b[38;5;241m=\u001b[39m TenToThePower()\u001b[38;5;241m.\u001b[39msplit(p\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m])\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Run the 5 tasks in parallel split across 3 processes\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mten_to_the_power\u001b[49m\u001b[43m(\u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_procs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 14\u001b[0m p1, p2, p3, p4, p5 \u001b[38;5;241m=\u001b[39m outputs\u001b[38;5;241m.\u001b[39mout\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m10^5 = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp5\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:194\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, **kwargs)\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter(\n\u001b[1;32m 184\u001b[0m audit_flags\u001b[38;5;241m=\u001b[39maudit_flags,\n\u001b[1;32m 185\u001b[0m cache_dir\u001b[38;5;241m=\u001b[39mcache_dir,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 192\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 193\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[0;32m--> 194\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 196\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__notes__\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m WORKER_KWARG_FAIL_NOTE \u001b[38;5;129;01min\u001b[39;00m e\u001b[38;5;241m.\u001b[39m__notes__:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:156\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, task_def)\u001b[0m\n\u001b[1;32m 154\u001b[0m task \u001b[38;5;241m=\u001b[39m Task(task_def, submitter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtask\u001b[39m\u001b[38;5;124m\"\u001b[39m, environment\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39menvironment)\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task\u001b[38;5;241m.\u001b[39mis_async: \u001b[38;5;66;03m# Only workflow tasks can be async\u001b[39;00m\n\u001b[0;32m--> 156\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_until_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_async\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/envs/wf12/lib/python3.12/site-packages/nest_asyncio.py:98\u001b[0m, in \u001b[0;36m_patch_loop..run_until_complete\u001b[0;34m(self, future)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m f\u001b[38;5;241m.\u001b[39mdone():\n\u001b[1;32m 96\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mEvent loop stopped before Future completed.\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 98\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/futures.py:203\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__log_traceback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 203\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\u001b[38;5;241m.\u001b[39mwith_traceback(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception_tb)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_result\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/asyncio/tasks.py:314\u001b[0m, in \u001b[0;36mTask.__step_run_and_handle_result\u001b[0;34m(***failed resolving arguments***)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;66;03m# We use the `send` method directly, because coroutines\u001b[39;00m\n\u001b[1;32m 313\u001b[0m \u001b[38;5;66;03m# don't have `__iter__` and `__next__` methods.\u001b[39;00m\n\u001b[0;32m--> 314\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mcoro\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 316\u001b[0m result \u001b[38;5;241m=\u001b[39m coro\u001b[38;5;241m.\u001b[39mthrow(exc)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:51\u001b[0m, in \u001b[0;36mWorker.run_async\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_async\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m, rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m task\u001b[38;5;241m.\u001b[39mrun_async(rerun\u001b[38;5;241m=\u001b[39mrerun)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:416\u001b[0m, in \u001b[0;36mTask.run_async\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 416\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39m_run_async(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 417\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:709\u001b[0m, in \u001b[0;36mWorkflowDef._run_async\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 707\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_async\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 708\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow asynchronously.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 709\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m task\u001b[38;5;241m.\u001b[39msubmitter\u001b[38;5;241m.\u001b[39mexpand_workflow_async(task)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:285\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow_async\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m hashes_have_changed:\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 274\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSet loglevel to \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdebug\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m in order to track hash changes \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 275\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthroughout the execution of the workflow.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor more types in your interface inputs.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 284\u001b[0m )\n\u001b[0;32m--> 285\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(msg)\n\u001b[1;32m 286\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m task\u001b[38;5;241m.\u001b[39mis_async:\n", - "\u001b[0;31mRuntimeError\u001b[0m: Graph of 'Workflow(name='Split', inputs=Split(_constructed=None, defn=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), constructor=.Split at 0x114510d60>), outputs=SplitOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=...), _state=, _cont_dim=None, _inner_cont_dim={}))), _nodes={'TenToThePower': Node(name='TenToThePower', _definition=TenToThePower(p=StateArray(1, 2, 3, 4, 5), function=), _workflow=..., _lzout=TenToThePowerOutputs(out=LazyOutField(field='out', type=list[int], cast_from=None, type_checked=True, node=...)), _state=, _cont_dim=None, _inner_cont_dim={})})' workflow is not empty, but not able to get more tasks - something has gone wrong when retrieving the results predecessors:\n\n" + "[TenToThePower(p=1, function=), TenToThePower(p=2, function=), TenToThePower(p=3, function=), TenToThePower(p=4, function=), TenToThePower(p=5, function=)]\n", + "8ebcb54492b1642d6ea257afdb33786d\n", + "8ebcb54492b1642d6ea257afdb33786d\n" ] } ], "source": [ + "\n", "\n", "from pydra.design import python\n", "\n", @@ -107,12 +90,37 @@ "\n", " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", "\n", - " # Run the 5 tasks in parallel split across 3 processes\n", - " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + " from pydra.design import workflow\n", + " from pydra.engine.core import Workflow\n", + " from pydra.engine.specs import TaskDef\n", + " from pydra.engine.helpers import list_fields\n", + "\n", + " output_types = {o.name: list[o.type] for o in list_fields(ten_to_the_power.Outputs)}\n", + " \n", + " @workflow.define(outputs=output_types)\n", + " def Split(defn: TaskDef):\n", + " node = workflow.add(defn)\n", + " return tuple(getattr(node, o) for o in output_types)\n", + "\n", + " split = Split(defn=ten_to_the_power)\n", + "\n", + " wf = Workflow.construct(split)\n", + " splits = list(wf[\"TenToThePower\"]._split_definition().values())\n", + "\n", + " print(splits)\n", + "\n", + " print(splits[0]._hash)\n", + " print(splits[0]._hash)\n", + " \n", + "\n", + " \n", + "\n", + " # # Run the 5 tasks in parallel split across 3 processes\n", + " # outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", "\n", - " p1, p2, p3, p4, p5 = outputs.out\n", + " # p1, p2, p3, p4, p5 = outputs.out\n", "\n", - " print(f\"10^5 = {p5}\")" + " # print(f\"10^5 = {p5}\")" ] }, { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 60000fd956..2a91a6edbf 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -9,7 +9,7 @@ def TenToThePower(p: int) -> int: ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) # Run the 5 tasks in parallel split across 3 processes - outputs = ten_to_the_power(worker="cf", n_procs=3) + outputs = ten_to_the_power(worker="cf", n_procs=3, clean_stale_locks=True) p1, p2, p3, p4, p5 = outputs.out diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 292235acde..7213881d50 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -3,6 +3,7 @@ import json import logging import os +import inspect import sys from pathlib import Path import typing as ty @@ -23,6 +24,7 @@ from pydra.utils.hash import hash_function from pydra.utils.typing import TypeParser, StateArray from .node import Node +from datetime import datetime from fileformats.generic import FileSet from .specs import ( RuntimeSpec, @@ -458,6 +460,13 @@ def done(self): return True return False + @property + def run_start_time(self) -> datetime | None: + """Check whether the task is currently running.""" + if not self.lockfile.exists(): + return None + return datetime.fromtimestamp(self.lockfile.stat().st_ctime) + def _combined_output(self, return_inputs=False): combined_results = [] for gr, ind_l in self.state.final_combined_ind_mapping.items(): @@ -523,7 +532,7 @@ def _check_for_hash_changes(self): field = getattr(attr.fields(type(self.definition)), changed) val = getattr(self.definition, changed) field_type = type(val) - if issubclass(field.type, FileSet): + if inspect.isclass(field.type) and issubclass(field.type, FileSet): details += ( f"- {changed}: value passed to the {field.type} field is of type " f"{field_type} ('{val}'). If it is intended to contain output data " diff --git a/pydra/engine/state.py b/pydra/engine/state.py index c97d71a53d..ef65487ca9 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -39,6 +39,11 @@ def __init__(self, indices: dict[str, int] | None = None): else: self.indices = OrderedDict(sorted(indices.items())) + def __repr__(self): + return ( + "StateIndex(" + ", ".join(f"{n}={v}" for n, v in self.indices.items()) + ")" + ) + def __hash__(self): return hash(tuple(self.indices.items())) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index b9af104888..40fef6d8f0 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -7,6 +7,7 @@ from pathlib import Path from tempfile import mkdtemp from copy import copy +from datetime import datetime from collections import defaultdict from .workers import Worker, WORKERS from .graph import DiGraph @@ -21,7 +22,7 @@ from .core import Task from pydra.utils.messenger import AuditFlag, Messenger from pydra.utils import user_cache_dir - +from pydra.design import workflow import logging logger = logging.getLogger("pydra.submitter") @@ -62,20 +63,37 @@ class Submitter: Messengers, by default None messenger_args : dict, optional Messenger arguments, by default None + clean_stale_locks : bool, optional + Whether to clean stale lock files, i.e. lock files that were created before the + start of the current run. Don't set if using a global cache where there are + potentially multiple workflows that are running concurrently. By default (None), + lock files will be cleaned if the *debug* worker is used **kwargs : dict Keyword arguments to pass on to the worker initialisation """ + cache_dir: os.PathLike + worker: Worker + environment: "Environment | None" + rerun: bool + cache_locations: list[os.PathLike] + audit_flags: AuditFlag + messengers: ty.Iterable[Messenger] + messenger_args: dict[str, ty.Any] + clean_stale_locks: bool + run_start_time: datetime | None + def __init__( self, cache_dir: os.PathLike | None = None, - worker: ty.Union[str, ty.Type[Worker]] = "debug", + worker: str | ty.Type[Worker] | Worker = "debug", environment: "Environment | None" = None, rerun: bool = False, cache_locations: list[os.PathLike] | None = None, audit_flags: AuditFlag = AuditFlag.NONE, messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, + clean_stale_locks: bool | None = None, **kwargs, ): @@ -113,6 +131,12 @@ def __init__( except TypeError as e: e.add_note(WORKER_KWARG_FAIL_NOTE) raise + self.run_start_time = None + self.clean_stale_locks = ( + clean_stale_locks + if clean_stale_locks is not None + else (self.worker_name == "debug") + ) self.worker_kwargs = kwargs self._worker.loop = self.loop @@ -133,18 +157,16 @@ def __call__( task_def._check_rules() # If the outer task is split, create an implicit workflow to hold the split nodes if task_def._splitter: - - from pydra.design import workflow from pydra.engine.specs import TaskDef output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} @workflow.define(outputs=output_types) - def Split(defn: TaskDef): + def Split(defn: TaskDef, output_types: dict): node = workflow.add(defn) return tuple(getattr(node, o) for o in output_types) - task_def = Split(defn=task_def) + task_def = Split(defn=task_def, output_types=output_types) elif task_def._combiner: raise ValueError( @@ -152,17 +174,23 @@ def Split(defn: TaskDef): "Use the `split` method to split the task before combining." ) task = Task(task_def, submitter=self, name="task", environment=self.environment) - if task.is_async: # Only workflow tasks can be async - self.loop.run_until_complete(self.worker.run_async(task, rerun=self.rerun)) - else: - self.worker.run(task, rerun=self.rerun) + try: + self.run_start_time = datetime.now() + if task.is_async: # Only workflow tasks can be async + self.loop.run_until_complete( + self.worker.run_async(task, rerun=self.rerun) + ) + else: + self.worker.run(task, rerun=self.rerun) + finally: + self.run_start_time = None PersistentCache().clean_up() result = task.result() if result is None: if task.lockfile.exists(): raise RuntimeError( f"Task {task} has a lockfile, but no result was found. " - "This may be due to another submission process running, or the hard " + "This may be due to another submission process queued, or the hard " "interrupt (e.g. a debugging abortion) interrupting a previous run. " f"In the case of an interrupted run, please remove {str(task.lockfile)!r} " "and resubmit." @@ -228,18 +256,30 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non # this might be related to some delays saving the files # so try to get_runnable_tasks for another minute ii = 0 - while not tasks and exec_graph.nodes: + while not tasks and any(not n.done for n in exec_graph.nodes): tasks = self.get_runnable_tasks(exec_graph) ii += 1 # don't block the event loop! await asyncio.sleep(1) - if ii > 60: + if ii > 10: + not_done = "\n".join( + ( + f"{n.name}: started={bool(n.started)}, " + f"blocked={list(n.blocked)}, queued={list(n.queued)}" + ) + for n in exec_graph.nodes + if not n.done + ) msg = ( - f"Graph of '{wf}' workflow is not empty, but not able to get " - "more tasks - something has gone wrong when retrieving the " - "results predecessors:\n\n" + "Something has gone wrong when retrieving the predecessor " + f"results. Not able to get any more tasks but he following " + f"nodes of the {wf.name!r} workflow are not done:\n{not_done}\n\n" ) - # Get blocked tasks and the predecessors they are waiting on + not_done = [n for n in exec_graph.nodes if not n.done] + msg += "\n" + ", ".join( + f"{t.name}: {t.done}" for t in not_done[0].queued.values() + ) + # Get blocked tasks and the predecessors they are blocked on outstanding: dict[Task[DefType], list[Task[DefType]]] = { t: [ p for p in exec_graph.predecessors[t.name] if not p.done @@ -248,11 +288,11 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non } hashes_have_changed = False - for task, waiting_on in outstanding.items(): - if not waiting_on: + for task, blocked_on in outstanding.items(): + if not blocked_on: continue msg += f"- '{task.name}' node blocked due to\n" - for pred in waiting_on: + for pred in blocked_on: if ( pred.checksum != wf.inputs._graph_checksums[pred.name] @@ -302,13 +342,21 @@ def close(self): """ Close submitter. - Do not close previously running loop. + Do not close previously queued loop. """ self.worker.close() if self._own_loop: self.loop.close() + def _check_locks(self, tasks: list[Task]) -> None: + """Check for stale lock files and remove them.""" + if self.clean_stale_locks: + for task in tasks: + start_time = task.run_start_time + if start_time and start_time < self.run_start_time: + task.lockfile.unlink() + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """Parse a graph and return all runnable tasks. @@ -338,6 +386,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: if not node.started: not_started.add(node) tasks.extend(node.get_runnable_tasks(graph)) + self._check_locks(tasks) return tasks @property @@ -369,10 +418,12 @@ class NodeExecution(ty.Generic[DefType]): errored: dict[StateIndex | None, "Task[DefType]"] # List of tasks that couldn't be run due to upstream errors unrunnable: dict[StateIndex | None, list["Task[DefType]"]] - # List of tasks that are running - running: dict[StateIndex | None, "Task[DefType]"] - # List of tasks that are waiting on other tasks to complete before they can be run - waiting: dict[StateIndex | None, "Task[DefType]"] + # List of tasks that are queued + queued: dict[StateIndex | None, "Task[DefType]"] + # List of tasks that are queued + running: dict[StateIndex | None, tuple["Task[DefType]", datetime]] + # List of tasks that are blocked on other tasks to complete before they can be run + blocked: dict[StateIndex | None, "Task[DefType]"] _tasks: dict[StateIndex | None, "Task[DefType]"] | None @@ -391,10 +442,11 @@ def __init__( self.submitter = submitter # Initialize the state dictionaries self._tasks = None - self.waiting = {} + self.blocked = {} self.successful = {} self.errored = {} - self.running = {} + self.queued = {} + self.running = {} # Not used in logic, but may be useful for progress tracking self.unrunnable = defaultdict(list) self.state_names = self.node.state.names self.workflow_inputs = workflow_inputs @@ -430,18 +482,44 @@ def started(self) -> bool: self.successful or self.errored or self.unrunnable - or self.running - or self.waiting + or self.queued + or self.blocked ) @property def done(self) -> bool: - return self.started and not (self.running or self.waiting) + self.update_status() + if not self.started: + return False + # Check to see if any previously queued tasks have completed + return not (self.queued or self.blocked or self.running) + + def update_status(self) -> None: + """Updates the status of the tasks in the node.""" + if not self.started: + return + # Check to see if any previously queued tasks have completed + for index, task in list(self.queued.items()): + if task.done: + self.successful[task.state_index] = self.queued.pop(index) + elif task.errored: + self.errored[task.state_index] = self.queued.pop(index) + elif task.run_start_time: + self.running[task.state_index] = ( + self.queued.pop(index), + task.run_start_time, + ) + # Check to see if any previously running tasks have completed + for index, (task, start_time) in list(self.running.items()): + if task.done: + self.successful[task.state_index] = self.running.pop(index)[0] + elif task.errored: + self.errored[task.state_index] = self.running.pop(index)[0] @property def all_failed(self) -> bool: return (self.unrunnable or self.errored) and not ( - self.successful or self.waiting or self.running + self.successful or self.blocked or self.queued ) def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: @@ -470,7 +548,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """For a given node, check to see which tasks have been successfully run, are ready - to run, can't be run due to upstream errors, or are waiting on other tasks to complete. + to run, can't be run due to upstream errors, or are blocked on other tasks to complete. Parameters ---------- @@ -488,29 +566,23 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: runnable: list["Task[DefType]"] = [] self.tasks # Ensure tasks are loaded if not self.started: - self.waiting = copy(self._tasks) - # Check to see if any previously running tasks have completed - for index, task in list(self.running.items()): - if task.done: - self.successful[task.state_index] = self.running.pop(index) - elif task.errored: - self.errored[task.state_index] = self.running.pop(index) - # Check to see if any waiting tasks are now runnable/unrunnable - for index, task in list(self.waiting.items()): + self.blocked = copy(self._tasks) + # Check to see if any blocked tasks are now runnable/unrunnable + for index, task in list(self.blocked.items()): pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: if index not in pred.successful: is_runnable = False if index in pred.errored: - self.unrunnable[index].append(self.waiting.pop(index)) + self.unrunnable[index].append(self.blocked.pop(index)) if index in pred.unrunnable: self.unrunnable[index].extend(pred.unrunnable[index]) - self.waiting.pop(index) + self.blocked.pop(index) break if is_runnable: - runnable.append(self.waiting.pop(index)) - self.running.update({t.state_index: t for t in runnable}) + runnable.append(self.blocked.pop(index)) + self.queued.update({t.state_index: t for t in runnable}) return runnable diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 224af25fba..a836eaddf3 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -4,12 +4,9 @@ import os import struct import inspect -import re from datetime import datetime import typing as ty import types -import ast -import cloudpickle as cp from pathlib import Path from collections.abc import Mapping from functools import singledispatch @@ -331,7 +328,17 @@ def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: elif hasattr(obj, "__slots__"): dct = {attr: getattr(obj, attr) for attr in obj.__slots__} else: - dct = obj.__dict__ + try: + dct = obj.__dict__ + except AttributeError: + dct = { + n: getattr(obj, n) + for n in dir(obj) + if not ( + (n.startswith("__") and n.endswith("__")) + or inspect.ismethod(getattr(obj, n)) + ) + } yield from bytes_repr_mapping_contents(dct, cache) yield b"}" @@ -525,31 +532,39 @@ def bytes_repr_set(obj: Set, cache: Cache) -> Iterator[bytes]: yield b"}" +@register_serializer +def bytes_repr_code(obj: types.CodeType, cache: Cache) -> Iterator[bytes]: + yield b"code:(" + yield from bytes_repr_sequence_contents( + ( + obj.co_argcount, + obj.co_posonlyargcount, + obj.co_kwonlyargcount, + obj.co_nlocals, + obj.co_stacksize, + obj.co_flags, + obj.co_code, + obj.co_consts, + obj.co_names, + obj.co_varnames, + obj.co_filename, + obj.co_freevars, + obj.co_name, + obj.co_firstlineno, + obj.co_lnotab, + obj.co_cellvars, + ), + cache, + ) + yield b")" + + @register_serializer def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes]: """Serialize a function, attempting to use the AST of the source code if available otherwise falling back to using cloudpickle to serialize the byte-code of the function.""" - try: - src = inspect.getsource(obj) - except OSError: - # Fallback to using cloudpickle to serialize the function if the source - # code is not available - bytes_repr = cp.dumps(obj) - else: - indent = re.match(r"(\s*)", src).group(1) - if indent: - src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) - src_ast = ast.parse(src) - # Remove the function definition from the source code - bytes_repr = ast.dump( - src_ast, annotate_fields=False, include_attributes=False - ).encode() - - yield b"function:(" - for i in range(0, len(bytes_repr), FUNCTION_SRC_CHUNK_LEN_DEFAULT): - yield hash_single(bytes_repr[i : i + FUNCTION_SRC_CHUNK_LEN_DEFAULT], cache) - yield b")" + yield from bytes_repr(obj.__code__, cache) def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[bytes]: From 142a58d76c08271ac4dc7345ec1af216b1067bf7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 27 Jan 2025 14:47:30 +1100 Subject: [PATCH 148/342] added troubleshooting tutorial stub --- .../source/tutorial/1-getting-started.ipynb | 186 ++++++++++-------- .../tutorial/2-advanced-execution.ipynb | 130 +++++++----- .../source/tutorial/3-troubleshooting.ipynb | 0 .../{3-python.ipynb => 4-python.ipynb} | 0 .../tutorial/{4-shell.ipynb => 5-shell.ipynb} | 0 .../{5-workflow.ipynb => 7-workflow.ipynb} | 0 6 files changed, 181 insertions(+), 135 deletions(-) create mode 100644 new-docs/source/tutorial/3-troubleshooting.ipynb rename new-docs/source/tutorial/{3-python.ipynb => 4-python.ipynb} (100%) rename new-docs/source/tutorial/{4-shell.ipynb => 5-shell.ipynb} (100%) rename new-docs/source/tutorial/{5-workflow.ipynb => 7-workflow.ipynb} (100%) diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index 469cd8d50a..e2b15bcd71 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -13,14 +13,13 @@ "errors before they are executed, and modular execution workers and environments to specified\n", "independently of the task being performed.\n", "\n", - "Tasks can encapsulate Python functions, shell-commands or workflows constructed from\n", - "task components.\n", + "Tasks can encapsulate Python functions or shell-commands, or be multi-component workflows,\n", + "themselves constructed from task components including nested workflows.\n", "\n", "## Preparation\n", "\n", - "Before we get started, lets set up some test data to play with.\n", - "\n", - "Here we create a sample JSON file in a temporary directory" + "Before we get started, lets set up some test data to play with. Here we create a sample\n", + "JSON file in a temporary directory" ] }, { @@ -45,7 +44,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Next we create a directory containing ten randomly generated [NIfTI](https://nifti.nimh.nih.gov/) files" + "Next we create a directory containing 10 randomly generated [NIfTI](https://nifti.nimh.nih.gov/) files" ] }, { @@ -116,7 +115,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev141+g03c7438b.d20250123\n" + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" ] } ], @@ -134,48 +133,6 @@ "assert outputs.out == JSON_CONTENTS" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If you want to access a richer `Result` object you can use a Submitter object to execute the task" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result(task=, outputs=LoadJsonOutputs(out={'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}), runtime=None, errored=False)\n" - ] - } - ], - "source": [ - "from pydra.engine.submitter import Submitter\n", - "\n", - "with Submitter() as submitter:\n", - " result = submitter(load_json)\n", - "\n", - "print(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `Result` object contains\n", - "\n", - "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", - "* `runtime`: information about the peak memory and CPU usage\n", - "* `errored`: the error status of the task\n", - "* `task`: the task object that generated the results\n", - "* `output_dir`: the output directory the results are stored in" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -204,16 +161,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n" + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n" ] } ], @@ -255,16 +212,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/Task_c989fc46725c0d124dc287f463674e63/out_file.mif\n" + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n" ] } ], @@ -301,7 +258,7 @@ "\n", "By default, Pydra will use the *debug* worker, which executes each task sequentially.\n", "This makes it easier to debug tasks and workflows, however, in most cases, once a workflow\n", - "is ready to go, a concurrent worker is preferable so tasks can be executed in parallel\n", + "is tested, a concurrent worker is preferable so tasks can be executed in parallel\n", "(see [Workers](./2-advanced-execution.html#Workers)). To use multiple processes on a\n", "workstation, select the `cf` worker option when executing the task/workflow.\n", "\n", @@ -313,18 +270,23 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'nifti_dir' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmrtrix3\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mv3_0\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MrGrid\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;18m__name__\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__main__\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;66;03m# <-- Add this block to allow the script to imported by subprocesses\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m mrgrid \u001b[38;5;241m=\u001b[39m MrGrid(operation\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mregrid\u001b[39m\u001b[38;5;124m\"\u001b[39m, voxel\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m,\u001b[38;5;241m0.5\u001b[39m))\u001b[38;5;241m.\u001b[39msplit(in_file\u001b[38;5;241m=\u001b[39m\u001b[43mnifti_dir\u001b[49m\u001b[38;5;241m.\u001b[39miterdir())\n\u001b[1;32m 5\u001b[0m outputs \u001b[38;5;241m=\u001b[39m mrgrid(worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcf\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;66;03m# <-- Select the \"cf\" worker here\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mstr\u001b[39m(p) \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m outputs\u001b[38;5;241m.\u001b[39mout_file))\n", - "\u001b[0;31mNameError\u001b[0m: name 'nifti_dir' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", + "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n" ] } ], @@ -341,9 +303,71 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Debugging failed tasks\n", + "## File-system locations\n", + "\n", + "Output and intermediate files are typically generated during the course of a workflow/task run.\n", + "In addition to this, Pydra generates a cache directory for each task, to store the\n", + "the task definition and results in pickle files for future reference\n", + "(see [Troubleshooting](./troubleshooting.html)). By default, these task cache directories\n", + "are stored in a platform-specific application cache directory\n", + "\n", + "* Windows: ??\n", + "* Linux: ??\n", + "* macOS: `/Users//Library/Caches/pydra//run-cache`\n", + "\n", + "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", + "task and the operation to be performed. This hash is used to name the task cache directory\n", + "within the specified cache root directory. Therefore, if you use the same cache\n", + "root and in a subsequent run the same task is executed with the same\n", + "inputs, then the location of its output directory will be the same, and the outputs\n", + "generated by the previous run will be reused.\n", + "\n", + "This cache will grow as more runs are called, therefore care needs to be taken to ensure\n", + "there is enough space on the target disk. Since the cache will be constantly To specify\n", + "a different location for this cache, simply provide the `cache_root` keyword argument to the execution call" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'mrgrid' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmrgrid\u001b[49m(cache_root\u001b[38;5;241m=\u001b[39mPath(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m~/pydra-cache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mexpanduser())\n", + "\u001b[0;31mNameError\u001b[0m: name 'mrgrid' is not defined" + ] + } + ], + "source": [ + "outputs = mrgrid(cache_root=Path(\"~/pydra-cache\").expanduser())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To check alternative cache roots, while storing any generated task cache dirs in the \n", + "specified cache root, the `secondary_caches` keyword argument can be used" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.helpers import default_cache_root\n", "\n", - "Work in progress..." + "outputs = mrgrid(\n", + " cache_root=Path(\"~/pydra-cache\").expanduser(),\n", + " secondary_caches=[default_cache_root]\n", + ")" ] }, { diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 7d54fd55b7..510dfdf6ad 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -24,6 +24,57 @@ "nest_asyncio.apply()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submitter\n", + "\n", + "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "@python.define\n", + "def TenToThePower(p: int) -> int:\n", + " return 10 ** p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.submitter import Submitter\n", + "\n", + "ten_to_the_power = TenToThePower(p=3)\n", + "\n", + "with Submitter() as submitter:\n", + " result = submitter(ten_to_the_power)\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Result` object contains\n", + "\n", + "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", + "* `runtime`: information about the peak memory and CPU usage\n", + "* `errored`: the error status of the task\n", + "* `task`: the task object that generated the results\n", + "* `output_dir`: the output directory the results are stored in" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -64,63 +115,30 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[TenToThePower(p=1, function=), TenToThePower(p=2, function=), TenToThePower(p=3, function=), TenToThePower(p=4, function=), TenToThePower(p=5, function=)]\n", - "8ebcb54492b1642d6ea257afdb33786d\n", - "8ebcb54492b1642d6ea257afdb33786d\n" + "10^5 = 100000\n" ] } ], "source": [ - "\n", - "\n", "from pydra.design import python\n", "\n", "if __name__ == \"__main__\":\n", "\n", - " @python.define\n", - " def TenToThePower(p: int) -> int:\n", - " return 10 ** p\n", - "\n", " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", "\n", - " from pydra.design import workflow\n", - " from pydra.engine.core import Workflow\n", - " from pydra.engine.specs import TaskDef\n", - " from pydra.engine.helpers import list_fields\n", - "\n", - " output_types = {o.name: list[o.type] for o in list_fields(ten_to_the_power.Outputs)}\n", - " \n", - " @workflow.define(outputs=output_types)\n", - " def Split(defn: TaskDef):\n", - " node = workflow.add(defn)\n", - " return tuple(getattr(node, o) for o in output_types)\n", - "\n", - " split = Split(defn=ten_to_the_power)\n", - "\n", - " wf = Workflow.construct(split)\n", - " splits = list(wf[\"TenToThePower\"]._split_definition().values())\n", - "\n", - " print(splits)\n", - "\n", - " print(splits[0]._hash)\n", - " print(splits[0]._hash)\n", - " \n", + " # Run the 5 tasks in parallel split across 3 processes\n", + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", "\n", - " \n", + " p1, p2, p3, p4, p5 = outputs.out\n", "\n", - " # # Run the 5 tasks in parallel split across 3 processes\n", - " # outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", - "\n", - " # p1, p2, p3, p4, p5 = outputs.out\n", - "\n", - " # print(f\"10^5 = {p5}\")" + " print(f\"10^5 = {p5}\")" ] }, { @@ -132,9 +150,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "'ConcurrentFuturesWorker' object is not callable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m ten_to_the_power \u001b[38;5;241m=\u001b[39m TenToThePower()\u001b[38;5;241m.\u001b[39msplit(p\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m6\u001b[39m, \u001b[38;5;241m7\u001b[39m, \u001b[38;5;241m8\u001b[39m, \u001b[38;5;241m9\u001b[39m, \u001b[38;5;241m10\u001b[39m])\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Run the 5 tasks in parallel split across 3 processes\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mten_to_the_power\u001b[49m\u001b[43m(\u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mConcurrentFuturesWorker\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn_procs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m p6, p7, p8, p9, p10 \u001b[38;5;241m=\u001b[39m outputs\u001b[38;5;241m.\u001b[39mout\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m10^10 = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp10\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:183\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, **kwargs)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# noqa: F811\u001b[39;00m\n\u001b[1;32m 178\u001b[0m Submitter,\n\u001b[1;32m 179\u001b[0m WORKER_KWARG_FAIL_NOTE,\n\u001b[1;32m 180\u001b[0m )\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mSubmitter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43maudit_flags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudit_flags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_locations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_locations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessenger_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessenger_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessengers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessengers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43menvironment\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menvironment\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[1;32m 194\u001b[0m result \u001b[38;5;241m=\u001b[39m sub(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:130\u001b[0m, in \u001b[0;36mSubmitter.__init__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, clean_stale_locks, **kwargs)\u001b[0m\n\u001b[1;32m 128\u001b[0m worker_cls \u001b[38;5;241m=\u001b[39m worker\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 130\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_worker \u001b[38;5;241m=\u001b[39m \u001b[43mworker_cls\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 132\u001b[0m e\u001b[38;5;241m.\u001b[39madd_note(WORKER_KWARG_FAIL_NOTE)\n", + "\u001b[0;31mTypeError\u001b[0m: 'ConcurrentFuturesWorker' object is not callable", + "\u001b[0mAttempting to instantiate worker submitter" + ] + } + ], "source": [ "from pydra.engine.workers import ConcurrentFuturesWorker\n", "\n", @@ -151,18 +184,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": [ - "## Cache locations\n", - "\n", - "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", - "task and the operation to be performed. This hash is used to name the output directory for\n", - "the task within the specified cache directory. Therefore, if you use the same cache\n", - "directory between runs and in a subsequent run the same task is executed with the same\n", - "inputs then the location of its output directory will also be the same, and the outputs\n", - "generated by the previous run are reused.\n", - "\n", - "For example, using the MrGrid example from the [Getting Started Tutorial](./1-getting-started.html)\n" - ] + "source": [] }, { "cell_type": "code", @@ -202,7 +224,7 @@ "with submitter:\n", " result1 = submitter(mrgrid)\n", "\n", - "print(result1)\n" + "print(result1)" ] }, { diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb new file mode 100644 index 0000000000..e69de29bb2 diff --git a/new-docs/source/tutorial/3-python.ipynb b/new-docs/source/tutorial/4-python.ipynb similarity index 100% rename from new-docs/source/tutorial/3-python.ipynb rename to new-docs/source/tutorial/4-python.ipynb diff --git a/new-docs/source/tutorial/4-shell.ipynb b/new-docs/source/tutorial/5-shell.ipynb similarity index 100% rename from new-docs/source/tutorial/4-shell.ipynb rename to new-docs/source/tutorial/5-shell.ipynb diff --git a/new-docs/source/tutorial/5-workflow.ipynb b/new-docs/source/tutorial/7-workflow.ipynb similarity index 100% rename from new-docs/source/tutorial/5-workflow.ipynb rename to new-docs/source/tutorial/7-workflow.ipynb From 544bbc2c8e4b4bb3973d89ef20afdc30861307b7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 11:32:06 +1100 Subject: [PATCH 149/342] touched up getting started tutorial --- .../source/tutorial/1-getting-started.ipynb | 89 +++++++++---------- pydra/engine/submitter.py | 4 +- pydra/utils/__init__.py | 7 +- pydra/utils/misc.py | 2 + 4 files changed, 52 insertions(+), 50 deletions(-) diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index e2b15bcd71..a4deebd786 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -108,17 +108,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" - ] - } - ], + "outputs": [], "source": [ "# Import the task definition\n", "from pydra.tasks.common import LoadJson\n", @@ -154,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -205,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -270,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -306,21 +298,23 @@ "## File-system locations\n", "\n", "Output and intermediate files are typically generated during the course of a workflow/task run.\n", - "In addition to this, Pydra generates a cache directory for each task, to store the\n", - "the task definition and results in pickle files for future reference\n", - "(see [Troubleshooting](./troubleshooting.html)). By default, these task cache directories\n", - "are stored in a platform-specific application cache directory\n", - "\n", - "* Windows: ??\n", - "* Linux: ??\n", + "In addition to this, Pydra generates a cache directory for each task, in which\n", + "the task definition, results and any errors are stored in [cloudpickle](https://github.com/cloudpipe/cloudpickle)\n", + "files for future reference (see [Troubleshooting](./troubleshooting.html)).\n", + "By default, these cache directories are stored in a platform-specific application-cache\n", + "directory\n", + "\n", + "* Windows: `C:\\Users\\\\AppData\\Local\\pydra\\\\run-cache`\n", + "* Linux: `/home//.cache/pydra//run-cache`\n", "* macOS: `/Users//Library/Caches/pydra//run-cache`\n", "\n", "When a task runs, a unique hash is generated by the combination of all the inputs to the\n", "task and the operation to be performed. This hash is used to name the task cache directory\n", - "within the specified cache root directory. Therefore, if you use the same cache\n", + "within the specified cache root. Therefore, if you use the same cache\n", "root and in a subsequent run the same task is executed with the same\n", - "inputs, then the location of its output directory will be the same, and the outputs\n", - "generated by the previous run will be reused.\n", + "inputs, then the path of its cache directory will be the same, and if Pydra finds\n", + "existing results at that path, then the outputs generated by the previous run will be\n", + "reused.\n", "\n", "This cache will grow as more runs are called, therefore care needs to be taken to ensure\n", "there is enough space on the target disk. Since the cache will be constantly To specify\n", @@ -329,21 +323,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 19, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'mrgrid' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmrgrid\u001b[49m(cache_root\u001b[38;5;241m=\u001b[39mPath(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m~/pydra-cache\u001b[39m\u001b[38;5;124m\"\u001b[39m)\u001b[38;5;241m.\u001b[39mexpanduser())\n", - "\u001b[0;31mNameError\u001b[0m: name 'mrgrid' is not defined" - ] - } - ], + "outputs": [], "source": [ "outputs = mrgrid(cache_root=Path(\"~/pydra-cache\").expanduser())" ] @@ -353,21 +335,34 @@ "metadata": {}, "source": [ "To check alternative cache roots, while storing any generated task cache dirs in the \n", - "specified cache root, the `secondary_caches` keyword argument can be used" + "specified cache root, the `cache_locations` keyword argument can be used" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SplitOutputs(out_file=[ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif')], return_code=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], stderr=['\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n'], stdout=['', '', '', '', '', '', '', '', '', ''])\n" + ] + } + ], "source": [ - "from pydra.engine.helpers import default_cache_root\n", + "from pydra.utils import default_run_cache_dir\n", + "\n", + "my_cache_dir = Path(\"~/pydra-cache\").expanduser()\n", + "my_cache_dir.mkdir(exist_ok=True)\n", "\n", "outputs = mrgrid(\n", - " cache_root=Path(\"~/pydra-cache\").expanduser(),\n", - " secondary_caches=[default_cache_root]\n", - ")" + " cache_dir=my_cache_dir,\n", + " cache_locations=[default_run_cache_dir]\n", + ")\n", + "\n", + "print(outputs)" ] }, { diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 40fef6d8f0..fce57938af 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -21,7 +21,7 @@ from .audit import Audit from .core import Task from pydra.utils.messenger import AuditFlag, Messenger -from pydra.utils import user_cache_dir +from pydra.utils import default_run_cache_dir from pydra.design import workflow import logging @@ -104,7 +104,7 @@ def __init__( develop=develop, ) if cache_dir is None: - cache_dir = user_cache_dir / "run-cache" + cache_dir = default_run_cache_dir cache_dir.mkdir(parents=True, exist_ok=True) elif not cache_dir.exists(): raise ValueError(f"Cache directory {str(cache_dir)!r} does not exist") diff --git a/pydra/utils/__init__.py b/pydra/utils/__init__.py index cfde94dbf8..1e36208886 100644 --- a/pydra/utils/__init__.py +++ b/pydra/utils/__init__.py @@ -1 +1,6 @@ -from .misc import user_cache_dir, add_exc_note, exc_info_matches # noqa: F401 +from .misc import ( # noqa: F401 + user_cache_dir, + default_run_cache_dir, + add_exc_note, + exc_info_matches, +) diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index c25c1c9bb8..30d1e29e5b 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -14,6 +14,8 @@ ) ) +default_run_cache_dir = user_cache_dir / "run-cache" + def add_exc_note(e: Exception, note: str) -> Exception: """Adds a note to an exception in a Python <3.11 compatible way From 7d89bb43262636998fb5f7d27d5cc1e397790920 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 13:07:50 +1100 Subject: [PATCH 150/342] cleaning up new docs --- new-docs/source/index.rst | 19 +- .../source/tutorial/1-getting-started.ipynb | 7 +- .../tutorial/2-advanced-execution.ipynb | 362 --------------- .../source/tutorial/2-troubleshooting.ipynb | 31 ++ .../tutorial/3-advanced-execution.ipynb | 439 ++++++++++++++++++ .../source/tutorial/3-troubleshooting.ipynb | 0 .../{7-workflow.ipynb => 6-workflow.ipynb} | 0 ...ical-form.ipynb => 7-canonical-form.ipynb} | 2 +- pydra/engine/submitter.py | 38 +- 9 files changed, 509 insertions(+), 389 deletions(-) delete mode 100644 new-docs/source/tutorial/2-advanced-execution.ipynb create mode 100644 new-docs/source/tutorial/2-troubleshooting.ipynb create mode 100644 new-docs/source/tutorial/3-advanced-execution.ipynb delete mode 100644 new-docs/source/tutorial/3-troubleshooting.ipynb rename new-docs/source/tutorial/{7-workflow.ipynb => 6-workflow.ipynb} (100%) rename new-docs/source/tutorial/{6-canonical-form.ipynb => 7-canonical-form.ipynb} (98%) diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index dd16cde900..2b12f4bbc6 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -13,7 +13,7 @@ shell commands or Python functions. * Combine diverse tasks (`Python functions <./tutorial/3-python.html>`__ or `shell commands <./tutorial/4-shell.html>`__) into coherent, robust `workflows <./tutorial/5-workflow.html>`__ * Dynamic workflow construction using Python code (see :ref:`Dynamic construction`) -* Concurrent execution on `choice of computing platform (e.g. workstation, SLURM, SGE, Dask, etc...) <./tutorial/2-advanced-execution.html#Workers>`__ +* Concurrent execution on `choice of computing platform (e.g. workstation, SLURM, SGE, Dask, etc...) <./tutorial/3-advanced-execution.html#Workers>`__ * Map-reduce-like semantics (see :ref:`Splitting and combining`) * Global caching to reduce recomputation (see :ref:`Caches and hashes`) * Tasks can be executed in separate software environments, e.g. containers (see :ref:`Software environments`) @@ -72,7 +72,8 @@ Execution Learn how to execute existing tasks (including workflows) on different systems * :ref:`Getting started` -* :ref:`Execution options` +* :ref:`Troubleshooting` +* :ref:`Advanced execution` Design ~~~~~~ @@ -119,17 +120,18 @@ See the full reference documentation for Pydra :hidden: tutorial/1-getting-started - tutorial/2-advanced-execution + tutorial/2-troubleshooting + tutorial/3-advanced-execution .. toctree:: :maxdepth: 2 :caption: Tutorials: Design :hidden: - tutorial/3-python - tutorial/4-shell - tutorial/5-workflow - tutorial/6-canonical-form + tutorial/4-python + tutorial/5-shell + tutorial/6-workflow + tutorial/7-canonical-form .. toctree:: @@ -154,12 +156,13 @@ See the full reference documentation for Pydra :caption: Explanation :hidden: + explanation/design-approach explanation/splitting-combining explanation/conditional-lazy explanation/environments explanation/hashing-caching explanation/typing - explanation/design-approach + .. toctree:: :maxdepth: 2 diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index a4deebd786..a7a1b1ca22 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -251,8 +251,9 @@ "By default, Pydra will use the *debug* worker, which executes each task sequentially.\n", "This makes it easier to debug tasks and workflows, however, in most cases, once a workflow\n", "is tested, a concurrent worker is preferable so tasks can be executed in parallel\n", - "(see [Workers](./2-advanced-execution.html#Workers)). To use multiple processes on a\n", - "workstation, select the `cf` worker option when executing the task/workflow.\n", + "(see [Workers](./3-advanced-execution.html#Workers)). To use multiple processes on a\n", + "workstation, select the `cf` worker option when executing the task/workflow. Additional\n", + "keyword arguments, will be passed to the worker initialisation (e.g. `n_procs=4`).\n", "\n", "Note that when multiprocessing in Python on Windows and macOS (and good practice on Linux/POSIX\n", "OSs for compatibility), you need to place a `if __name__ == \"__main__\"` block when\n", @@ -287,7 +288,7 @@ "\n", "if __name__ == \"__main__\": # <-- Add this block to allow the script to imported by subprocesses\n", " mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5,0.5,0.5)).split(in_file=nifti_dir.iterdir())\n", - " outputs = mrgrid(worker=\"cf\") # <-- Select the \"cf\" worker here\n", + " outputs = mrgrid(worker=\"cf\", n_procs=4) # <-- Select the \"cf\" worker here\n", " print(\"\\n\".join(str(p) for p in outputs.out_file))" ] }, diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb deleted file mode 100644 index 510dfdf6ad..0000000000 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ /dev/null @@ -1,362 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Execution options\n", - "\n", - "One of the key design features of Pydra is the separation between the parameterisation of\n", - "the task to be executed, and the parameresiation of where and how the task should be\n", - "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", - "some of the available options for executing a task.\n", - "\n", - "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import nest_asyncio\n", - "nest_asyncio.apply()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Submitter\n", - "\n", - "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import python\n", - "\n", - "@python.define\n", - "def TenToThePower(p: int) -> int:\n", - " return 10 ** p" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.engine.submitter import Submitter\n", - "\n", - "ten_to_the_power = TenToThePower(p=3)\n", - "\n", - "with Submitter() as submitter:\n", - " result = submitter(ten_to_the_power)\n", - "\n", - "print(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `Result` object contains\n", - "\n", - "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", - "* `runtime`: information about the peak memory and CPU usage\n", - "* `errored`: the error status of the task\n", - "* `task`: the task object that generated the results\n", - "* `output_dir`: the output directory the results are stored in" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Workers\n", - "\n", - "Pydra supports several workers with which to execute tasks\n", - "\n", - "- `debug` (default)\n", - "- `cf`\n", - "- `slurm`\n", - "- `sge`\n", - "- `psij`\n", - "- `dask` (experimental)\n", - "\n", - "By default, the *debug* worker is used, which runs tasks serially in a single process\n", - "without use of the `asyncio` module. This makes it easier to debug errors in workflows\n", - "and python tasks, however, when using in Pydra in production you will typically want to\n", - "parallelise the execution for efficiency.\n", - "\n", - "If running on a local workstation, then the `cf` (*ConcurrentFutures*) worker is a good\n", - "option because it is able to spread the tasks to be run over multiple processes and\n", - "maximise CPU usage.\n", - "\n", - "If you have access to a high-performance cluster (HPC) then\n", - "the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", - "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) and [PSI/J](https://exaworks.org/psij)\n", - "workers can be used to submit each workflow node as separate jobs to the HPC scheduler.\n", - "There is also an experimental [Dask](https://www.dask.org/) worker, which provides a\n", - "range of execution backends to choose from.\n", - "\n", - "To specify a worker, the abbreviation can be passed either as a string or using the\n", - "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", - "arguments to the execution call. For example, if we wanted to run five tasks using the\n", - "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", - "call." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10^5 = 100000\n" - ] - } - ], - "source": [ - "from pydra.design import python\n", - "\n", - "if __name__ == \"__main__\":\n", - "\n", - " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", - "\n", - " # Run the 5 tasks in parallel split across 3 processes\n", - " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", - "\n", - " p1, p2, p3, p4, p5 = outputs.out\n", - "\n", - " print(f\"10^5 = {p5}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alternatively, the worker object can be initialised in the calling code and passed directly to the execution call" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "'ConcurrentFuturesWorker' object is not callable", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m ten_to_the_power \u001b[38;5;241m=\u001b[39m TenToThePower()\u001b[38;5;241m.\u001b[39msplit(p\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m6\u001b[39m, \u001b[38;5;241m7\u001b[39m, \u001b[38;5;241m8\u001b[39m, \u001b[38;5;241m9\u001b[39m, \u001b[38;5;241m10\u001b[39m])\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Run the 5 tasks in parallel split across 3 processes\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mten_to_the_power\u001b[49m\u001b[43m(\u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mConcurrentFuturesWorker\u001b[49m\u001b[43m(\u001b[49m\u001b[43mn_procs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m p6, p7, p8, p9, p10 \u001b[38;5;241m=\u001b[39m outputs\u001b[38;5;241m.\u001b[39mout\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m10^10 = \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp10\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:183\u001b[0m, in \u001b[0;36mTaskDef.__call__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, **kwargs)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mengine\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msubmitter\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ( \u001b[38;5;66;03m# noqa: F811\u001b[39;00m\n\u001b[1;32m 178\u001b[0m Submitter,\n\u001b[1;32m 179\u001b[0m WORKER_KWARG_FAIL_NOTE,\n\u001b[1;32m 180\u001b[0m )\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 183\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mSubmitter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 184\u001b[0m \u001b[43m \u001b[49m\u001b[43maudit_flags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudit_flags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 186\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_locations\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_locations\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 187\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessenger_args\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessenger_args\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessengers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmessengers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[43m \u001b[49m\u001b[43menvironment\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menvironment\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 191\u001b[0m \u001b[43m \u001b[49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 192\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[1;32m 194\u001b[0m result \u001b[38;5;241m=\u001b[39m sub(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:130\u001b[0m, in \u001b[0;36mSubmitter.__init__\u001b[0;34m(self, cache_dir, worker, environment, rerun, cache_locations, audit_flags, messengers, messenger_args, clean_stale_locks, **kwargs)\u001b[0m\n\u001b[1;32m 128\u001b[0m worker_cls \u001b[38;5;241m=\u001b[39m worker\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 130\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_worker \u001b[38;5;241m=\u001b[39m \u001b[43mworker_cls\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 132\u001b[0m e\u001b[38;5;241m.\u001b[39madd_note(WORKER_KWARG_FAIL_NOTE)\n", - "\u001b[0;31mTypeError\u001b[0m: 'ConcurrentFuturesWorker' object is not callable", - "\u001b[0mAttempting to instantiate worker submitter" - ] - } - ], - "source": [ - "from pydra.engine.workers import ConcurrentFuturesWorker\n", - "\n", - "ten_to_the_power = TenToThePower().split(p=[6, 7, 8, 9, 10])\n", - "\n", - "# Run the 5 tasks in parallel split across 3 processes\n", - "outputs = ten_to_the_power(worker=ConcurrentFuturesWorker(n_procs=3))\n", - "\n", - "p6, p7, p8, p9, p10 = outputs.out\n", - "\n", - "print(f\"10^10 = {p10}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import tempfile\n", - "from fileformats.medimage import Nifti1\n", - "from pydra.engine.submitter import Submitter\n", - "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", - "\n", - "# Make directory filled with nifti files\n", - "test_dir = Path(tempfile.mkdtemp())\n", - "nifti_dir = test_dir / \"nifti\"\n", - "nifti_dir.mkdir()\n", - "for i in range(10):\n", - " Nifti1.sample(nifti_dir, seed=i)\n", - "\n", - "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", - "# by splitting the \"input\" input field over all files in the directory\n", - "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", - " in_file=nifti_dir.iterdir()\n", - ")\n", - "\n", - "# Run the task to resample all NIfTI files\n", - "outputs = mrgrid()\n", - "\n", - "# Create a new custom directory\n", - "cache_dir = test_dir / \"cache\"\n", - "cache_dir.mkdir()\n", - "\n", - "submitter = Submitter(cache_dir=cache_dir)\n", - "\n", - "# Run the task to resample all NIfTI files with different voxel sizes\n", - "with submitter:\n", - " result1 = submitter(mrgrid)\n", - "\n", - "print(result1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we attempt to run the same task with the same parameterisation the cache directory\n", - "will point to the same location and the results will be reused" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mrgrid_varying_vox_sizes2 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result2 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result2.output_dir == result1.output_dir\n", - "\n", - "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", - "mrgrid_varying_vox_sizes2.inputs.voxel[2] = [0.25]\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result3 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "# The output directory will be different as the inputs are now different\n", - "assert result3.output_dir != result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that for file objects, the contents of the files are used to calculate the hash\n", - "not their paths. Therefore, when inputting large files there might be some additional\n", - "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", - "shouldn't need to be recalculated unless they are modified). However, this makes the\n", - "hashes invariant to file-system movement. For example, changing the name of one of the\n", - "files in the nifti directory won't invalidate the hash." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Rename a NIfTI file within the test directory\n", - "first_file = next(nifti_dir.iterdir())\n", - "first_file.rename(first_file.with_name(\"first.nii.gz\"))\n", - "\n", - "mrgrid_varying_vox_sizes3 = MrGrid().split(\n", - " (\"input\", \"voxel\"),\n", - " input=nifti_dir.iterdir(),\n", - " voxel=VOXEL_SIZES\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result4 = submitter(mrgrid_varying_vox_sizes2)\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result4.output_dir == result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "See [Caches and hashes](../explanation/hashing-caching.html) for more details on how inputs\n", - "are hashed for caching and issues to consider." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environments\n", - "\n", - "Work in progress...\n", - "\n", - "See [Containers and Environments](../explanation/environments.rst) for more details." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Provenance and auditing\n", - "\n", - "Work in progress..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/new-docs/source/tutorial/2-troubleshooting.ipynb b/new-docs/source/tutorial/2-troubleshooting.ipynb new file mode 100644 index 0000000000..1fe08f938a --- /dev/null +++ b/new-docs/source/tutorial/2-troubleshooting.ipynb @@ -0,0 +1,31 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Troubleshooting\n", + "\n", + "Failures are common in scientific analysis, even for well tested workflows, due to\n", + "the novel nature and of scientific experiments and known artefacts that can occur.\n", + "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", + "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", + "issue occurred.\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/3-advanced-execution.ipynb b/new-docs/source/tutorial/3-advanced-execution.ipynb new file mode 100644 index 0000000000..b537a88b08 --- /dev/null +++ b/new-docs/source/tutorial/3-advanced-execution.ipynb @@ -0,0 +1,439 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Advanced execution\n", + "\n", + "One of the key design features of Pydra is the separation between the parameterisation of\n", + "the task to be executed, and the parameresiation of where and how the task should be\n", + "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", + "some of the available options for executing a task.\n", + "\n", + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submitter\n", + "\n", + "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "@python.define\n", + "def TenToThePower(p: int) -> int:\n", + " return 10 ** p" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result(output_dir=PosixPath('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/TenToThePower_72982a38b5a17142cb2186803fe6b238'), outputs=TenToThePowerOutputs(out=1000), runtime=None, errored=False)\n" + ] + } + ], + "source": [ + "from pydra.engine.submitter import Submitter\n", + "\n", + "ten_to_the_power = TenToThePower(p=3)\n", + "\n", + "with Submitter() as submitter:\n", + " result = submitter(ten_to_the_power)\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Result` object contains\n", + "\n", + "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", + "* `runtime`: information about the peak memory and CPU usage\n", + "* `errored`: the error status of the task\n", + "* `task`: the task object that generated the results\n", + "* `output_dir`: the output directory the results are stored in" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workers\n", + "\n", + "Pydra supports several workers with which to execute tasks\n", + "\n", + "- `debug` (default)\n", + "- `cf`\n", + "- `slurm`\n", + "- `sge`\n", + "- `psij`\n", + "- `dask` (experimental)\n", + "\n", + "By default, the *debug* worker is used, which runs tasks serially in a single process\n", + "without use of the `asyncio` module. This makes it easier to debug errors in workflows\n", + "and python tasks, however, when using in Pydra in production you will typically want to\n", + "parallelise the execution for efficiency.\n", + "\n", + "If running on a local workstation, then the `cf` (*ConcurrentFutures*) worker is a good\n", + "option because it is able to spread the tasks to be run over multiple processes and\n", + "maximise CPU usage.\n", + "\n", + "If you have access to a high-performance cluster (HPC) then\n", + "the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", + "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) and [PSI/J](https://exaworks.org/psij)\n", + "workers can be used to submit each workflow node as separate jobs to the HPC scheduler.\n", + "There is also an experimental [Dask](https://www.dask.org/) worker, which provides a\n", + "range of execution backends to choose from.\n", + "\n", + "To specify a worker, the abbreviation can be passed either as a string or using the\n", + "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", + "arguments to the execution call. For example, if we wanted to run five tasks using the\n", + "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", + "call." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10^5 = 100000\n" + ] + } + ], + "source": [ + "from pydra.design import python\n", + "\n", + "if __name__ == \"__main__\":\n", + "\n", + " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", + "\n", + " # Run the 5 tasks in parallel split across 3 processes\n", + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + "\n", + " p1, p2, p3, p4, p5 = outputs.out\n", + "\n", + " print(f\"10^5 = {p5}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, the worker object can be initialised in the calling code and passed directly to the execution call" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10^10 = 10000000000\n" + ] + } + ], + "source": [ + "from pydra.engine.workers import ConcurrentFuturesWorker\n", + "\n", + "ten_to_the_power = TenToThePower().split(p=[6, 7, 8, 9, 10])\n", + "\n", + "# Run the 5 tasks in parallel split across 3 processes\n", + "outputs = ten_to_the_power(worker=ConcurrentFuturesWorker(n_procs=3))\n", + "\n", + "p6, p7, p8, p9, p10 = outputs.out\n", + "\n", + "print(f\"10^10 = {p10}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reusing previously generated results\n", + "\n", + "Pydra caches all task results in the runtime cache (see [File-system locations](./1-getting-started.html##File-system-locations))\n", + "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", + "go through some of the practicalities of this caching and hashing (see\n", + "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", + "to consider)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result(output_dir=PosixPath('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/Split_23f11b08e0449f5c5ee0a2756aeee2ea'), outputs=SplitOutputs(out_file=[ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif')], return_code=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], stderr=['\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n'], stdout=['', '', '', '', '', '', '', '', '', '']), runtime=None, errored=False)\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "import tempfile\n", + "from fileformats.medimage import Nifti1\n", + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "# Make directory filled with nifti files\n", + "test_dir = Path(tempfile.mkdtemp())\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "for i in range(10):\n", + " Nifti1.sample(nifti_dir, seed=i)\n", + "\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", + " in_file=nifti_dir.iterdir()\n", + ")\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid()\n", + "\n", + "# Create a new custom directory\n", + "cache_dir = test_dir / \"cache\"\n", + "cache_dir.mkdir()\n", + "\n", + "submitter = Submitter(cache_dir=cache_dir)\n", + "\n", + "# Run the task to resample all NIfTI files with different voxel sizes\n", + "with submitter:\n", + " result1 = submitter(mrgrid)\n", + "\n", + "print(result1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we attempt to run the same task with the same parameterisation the cache directory\n", + "will point to the same location and the results will be reused" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import copy\n", + "\n", + "VOX_SIZES = [\n", + " (0.5, 0.5, 0.5),\n", + " (0.25, 0.25, 0.25),\n", + " (0.1, 0.1, 0.1),\n", + " (0.35, 0.35, 0.35),\n", + " (0.1, 0.1, 0.1),\n", + " (0.5, 0.5, 0.5),\n", + " (0.25, 0.25, 0.25),\n", + " (0.2, 0.2, 0.2),\n", + " (0.35, 0.35, 0.35),\n", + " (0.1, 0.1, 0.1),\n", + " ]\n", + "\n", + "mrgrid_varying_vox = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result1 = submitter(mrgrid_varying_vox)\n", + "\n", + "\n", + "mrgrid_varying_vox2 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=copy(VOX_SIZES),\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result2 = submitter(mrgrid_varying_vox2)\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result2.output_dir == result1.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox2.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox2)\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result3.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "AssertionError", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[8], line 34\u001b[0m\n\u001b[1;32m 31\u001b[0m result4 \u001b[38;5;241m=\u001b[39m submitter(mrgrid_varying_vox4)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;66;03m# The cache directory for the new run is different \u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m result4\u001b[38;5;241m.\u001b[39moutput_dir \u001b[38;5;241m!=\u001b[39m result1\u001b[38;5;241m.\u001b[39moutput_dir\n", + "\u001b[0;31mAssertionError\u001b[0m: " + ] + } + ], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "first_file = next(nifti_dir.iterdir())\n", + "new_name = first_file.with_name(\"first.nii\")\n", + "first_file.rename(new_name)\n", + "\n", + "mrgrid_varying_vox3 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox3)\n", + "\n", + "assert result3.output_dir == result1.output_dir\n", + "\n", + "# Replace the first NIfTI file with a new file\n", + "new_name.unlink()\n", + "Nifti1.sample(nifti_dir, seed=100)\n", + "\n", + "# Update the in_file input field to include the new file\n", + "mrgrid_varying_vox4 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "# The results from the previous runs are ignored as the files have changed\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox4)\n", + "\n", + "# The cache directory for the new run is different \n", + "assert result4.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environments\n", + "\n", + "Work in progress...\n", + "\n", + "See [Containers and Environments](../explanation/environments.rst) for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provenance and auditing\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/new-docs/source/tutorial/7-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb similarity index 100% rename from new-docs/source/tutorial/7-workflow.ipynb rename to new-docs/source/tutorial/6-workflow.ipynb diff --git a/new-docs/source/tutorial/6-canonical-form.ipynb b/new-docs/source/tutorial/7-canonical-form.ipynb similarity index 98% rename from new-docs/source/tutorial/6-canonical-form.ipynb rename to new-docs/source/tutorial/7-canonical-form.ipynb index 4472f4d9c0..b220b3d5d9 100644 --- a/new-docs/source/tutorial/6-canonical-form.ipynb +++ b/new-docs/source/tutorial/7-canonical-form.ipynb @@ -10,7 +10,7 @@ "`pydra.design.*.define` decorators/functions are translated to\n", "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", - "in the [Python-tasks](./3-python.html), [Shell-tasks](./4-shell.html) and [Workflow](./5-workflow.html)\n", + "in the [Python-tasks](./4-python.html), [Shell-tasks](./5-shell.html) and [Workflow](./6-workflow.html)\n", "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", "for linters follow. Therefore, when designing task definitions to be used by third\n", "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index fce57938af..60a949f752 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -114,23 +114,31 @@ def __init__( self.rerun = rerun self.loop = get_open_loop() self._own_loop = not self.loop.is_running() - if isinstance(worker, str): - self.worker_name = worker - try: - worker_cls = WORKERS[self.worker_name] - except KeyError: - raise NotImplementedError(f"No worker for '{self.worker_name}' plugin") + if isinstance(worker, Worker): + self._worker = worker + self.worker_name = worker.plugin_name else: + if isinstance(worker, str): + self.worker_name = worker + try: + worker_cls = WORKERS[self.worker_name] + except KeyError: + raise NotImplementedError( + f"No worker for '{self.worker_name}' plugin" + ) + else: + try: + self.worker_name = worker.plugin_name + except AttributeError: + raise ValueError( + "Worker class must have a 'plugin_name' str attribute" + ) + worker_cls = worker try: - self.worker_name = worker.plugin_name - except AttributeError: - raise ValueError("Worker class must have a 'plugin_name' str attribute") - worker_cls = worker - try: - self._worker = worker_cls(**kwargs) - except TypeError as e: - e.add_note(WORKER_KWARG_FAIL_NOTE) - raise + self._worker = worker_cls(**kwargs) + except TypeError as e: + e.add_note(WORKER_KWARG_FAIL_NOTE) + raise self.run_start_time = None self.clean_stale_locks = ( clean_stale_locks From 94cee3e73850ee702bf2030bb2d1d850cc19b070 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 13:45:18 +1100 Subject: [PATCH 151/342] fixed test import errors --- pydra/design/tests/test_workflow.py | 3 +- pydra/engine/tests/test_environments.py | 4 +- pydra/engine/tests/test_helpers.py | 1 - .../engine/tests/test_shelltask_inputspec.py | 7 +- pydra/engine/tests/test_singularity.py | 3 +- pydra/engine/tests/test_specs.py | 5 +- pydra/engine/tests/test_state.py | 319 +++++++++++++----- pydra/engine/tests/test_task.py | 2 +- pydra/utils/tests/test_typing.py | 4 +- 9 files changed, 239 insertions(+), 109 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 086155d8ba..d77832c7bf 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -2,8 +2,7 @@ from copy import copy import pytest import attrs -from pydra.engine.workflow.base import Workflow -from pydra.engine.workflow.lazy import LazyInField, LazyOutField +from pydra.engine.lazy import LazyInField, LazyOutField import typing as ty from pydra.design import shell, python, workflow from pydra.engine.helpers import list_fields diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 3203d30272..2b2d036c6c 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -3,9 +3,7 @@ from ..environments import Native, Docker, Singularity from ..task import ShellTask from ..submitter import Submitter -from ..specs import ( - File, -) +from fileformats.generic import File from pydra.design import shell from .utils import no_win, need_docker, need_singularity import pytest diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 69e7cc71ad..14bb1203ad 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -15,7 +15,6 @@ save, load_and_run, position_sort, - parse_copyfile, parse_format_string, ) from pydra.utils.hash import hash_function diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 78815655ce..0a54ce83a2 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -4,11 +4,8 @@ import pytest from ..task import ShellTask -from pydra.engine.specs import ( - ShellOutputs, - ShellDef, - File, -) +from pydra.engine.specs import ShellOutputs, ShellDef +from fileformats.generic import File from pydra.design import shell diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index afde5397e8..0d8cd4cb07 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -5,7 +5,8 @@ from ..task import ShellTask from ..submitter import Submitter -from ..specs import ShellOutputs, File, ShellDef +from ..specs import ShellOutputs, ShellDef +from fileformats.generic import File from ..environments import Singularity diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 12c13647a0..544b7570d2 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -5,14 +5,13 @@ # from copy import deepcopy import time - +from fileformats.generic import File from ..specs import ( - File, Runtime, Result, ShellDef, ) -from pydra.engine.workflow.lazy import ( +from pydra.engine.lazy import ( LazyInField, LazyOutField, ) diff --git a/pydra/engine/tests/test_state.py b/pydra/engine/tests/test_state.py index c8ef0941ca..8339bdc2e6 100644 --- a/pydra/engine/tests/test_state.py +++ b/pydra/engine/tests/test_state.py @@ -1,6 +1,8 @@ import pytest from ..state import State +from pydra.design import python +from pydra.engine.specs import PythonDef, PythonOutputs from ..helpers_state import PydraStateError, add_name_splitter @@ -78,7 +80,7 @@ def test_state_1( inputs, splitter, ndim, states_ind, states_val, group_for_inputs, groups_stack ): """single state: testing groups, prepare_states and prepare_inputs""" - st = State(name="NA", splitter=splitter) + st = State(definition=example_def, name="NA", splitter=splitter) assert st.splitter == st.current_splitter assert st.splitter_rpn == st.current_splitter_rpn assert st.prev_state_splitter is None @@ -476,8 +478,8 @@ def test_state_connect_1(): """two 'connected' states: testing groups, prepare_states and prepare_inputs no explicit splitter for the second state """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "b")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] assert st2.prev_state_splitter == st2.splitter @@ -499,8 +501,13 @@ def test_state_connect_1a(): """two 'connected' states: testing groups, prepare_states and prepare_inputs the second state has explicit splitter from the first one (the prev-state part) """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="_NA", other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter="_NA", + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] @@ -516,8 +523,8 @@ def test_state_connect_1a(): def test_state_connect_1b_exception(): """can't provide explicitly NA.a (should be _NA)""" - State(name="NA", splitter="a", other_states={}) - st2 = State(name="NB", splitter="NA.a") + State(definition=example_def, name="NA", splitter="a", other_states={}) + st2 = State(definition=example_def, name="NB", splitter="NA.a") with pytest.raises(PydraStateError) as excinfo: st2.splitter_validation() assert "consider using _NA" in str(excinfo.value) @@ -527,7 +534,12 @@ def test_state_connect_1b_exception(): def test_state_connect_1c_exception(splitter2, other_states2): """can't ask for splitter from node that is not connected""" with pytest.raises(PydraStateError): - st2 = State(name="NB", splitter=splitter2, other_states=other_states2) + st2 = State( + definition=example_def, + name="NB", + splitter=splitter2, + other_states=other_states2, + ) st2.splitter_validation() @@ -536,8 +548,13 @@ def test_state_connect_2(): the second state has explicit splitter that contains splitter from the first node and a new field (the prev-state and current part) """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["_NA", "a"], other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter=["_NA", "a"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", "NB.a"] assert st2.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -580,8 +597,13 @@ def test_state_connect_2a(): splitter from the first node and a new field; adding an additional scalar field that is not part of the splitter """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["_NA", "a"], other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter=["_NA", "a"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", "NB.a"] assert st2.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -618,8 +640,10 @@ def test_state_connect_2b(): the second state has explicit splitter with a new field (the current part) splitter from the first node (the prev-state part) has to be added """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a", other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, name="NB", splitter="a", other_states={"NA": (st1, "b")} + ) assert st2.splitter == ["_NA", "NB.a"] assert st2.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -656,9 +680,13 @@ def test_state_connect_3(): the third state connected to two previous states; splitter from the previous states (the prev-state part) has to be added """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a") - st3 = State(name="NC", other_states={"NA": (st1, "b"), "NB": (st2, "c")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter="a") + st3 = State( + definition=example_def, + name="NC", + other_states={"NA": (st1, "b"), "NB": (st2, "c")}, + ) assert st3.splitter == ["_NA", "_NB"] assert st3.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -698,9 +726,10 @@ def test_state_connect_3a(): the third state connected to two previous states; the third state has explicit splitter that contains splitters from previous states """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( + definition=example_def, name="NC", splitter=["_NA", "_NB"], other_states={"NA": (st1, "b"), "NB": (st2, "c")}, @@ -740,10 +769,13 @@ def test_state_connect_3b(): the third state has explicit splitter that contains splitter only from the first state. splitter from the second state has to be added (partial prev-state part) """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( - name="NC", splitter="_NB", other_states={"NA": (st1, "b"), "NB": (st2, "c")} + definition=example_def, + name="NC", + splitter="_NB", + other_states={"NA": (st1, "b"), "NB": (st2, "c")}, ) assert st3.splitter == ["_NA", "_NB"] @@ -779,9 +811,10 @@ def test_state_connect_4(): the third state connected to two previous states; the third state has explicit scalar(!) splitter that contains two previous states """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( + definition=example_def, name="NC", splitter=("_NA", "_NB"), other_states={"NA": (st1, "b"), "NB": (st2, "c")}, @@ -810,8 +843,8 @@ def test_state_connect_5(): the first state has outer splitter, the second state has no explicit splitter """ - st1 = State(name="NA", splitter=["a", "b"]) - st2 = State(name="NB", other_states={"NA": (st1, "a")}) + st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "a")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -840,9 +873,10 @@ def test_state_connect_6(): the first state has outer splitter, the third state has explicit splitter with splitters from previous states """ - st1 = State(name="NA", splitter=["a", "b"]) - st2 = State(name="NB", splitter="a") + st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( + definition=example_def, name="NC", splitter=["_NA", "_NB"], other_states={"NA": (st1, "a"), "NB": (st2, "b")}, @@ -893,9 +927,13 @@ def test_state_connect_6a(): the first state has outer splitter, the third state has no explicit splitter """ - st1 = State(name="NA", splitter=["a", "b"]) - st2 = State(name="NB", splitter="a") - st3 = State(name="NC", other_states={"NA": (st1, "a"), "NB": (st2, "b")}) + st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st2 = State(definition=example_def, name="NB", splitter="a") + st3 = State( + definition=example_def, + name="NC", + other_states={"NA": (st1, "a"), "NB": (st2, "b")}, + ) assert st3.splitter == ["_NA", "_NB"] assert st3.splitter_rpn == ["NA.a", "NA.b", "*", "NB.a", "*"] @@ -940,8 +978,10 @@ def test_state_connect_7(): """two 'connected' states with multiple fields that are connected no explicit splitter for the second state """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", other_states={"NA": (st1, ["x", "y"])}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, name="NB", other_states={"NA": (st1, ["x", "y"])} + ) # should take into account that x, y come from the same task assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] @@ -966,9 +1006,13 @@ def test_state_connect_8(): pydra should recognize, that there is only one splitter - NA and it should give the same as the previous test """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", other_states={"NA": (st1, "b")}) - st3 = State(name="NC", other_states={"NA": (st1, "x"), "NB": (st2, "y")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "b")}) + st3 = State( + definition=example_def, + name="NC", + other_states={"NA": (st1, "x"), "NB": (st2, "y")}, + ) # x comes from NA and y comes from NB, but NB has only NA's splitter, # so it should be treated as both inputs are from NA state assert st3.splitter == "_NA" @@ -997,10 +1041,18 @@ def test_state_connect_9(): pydra should recognize, that there is only one splitter - NA_1 and NA_2 """ - st1 = State(name="NA_1", splitter="a") - st1a = State(name="NA_2", splitter="a") - st2 = State(name="NB", other_states={"NA_1": (st1, "b"), "NA_2": (st1a, "c")}) - st3 = State(name="NC", other_states={"NA_1": (st1, "x"), "NB": (st2, "y")}) + st1 = State(definition=example_def, name="NA_1", splitter="a") + st1a = State(definition=example_def, name="NA_2", splitter="a") + st2 = State( + definition=example_def, + name="NB", + other_states={"NA_1": (st1, "b"), "NA_2": (st1a, "c")}, + ) + st3 = State( + definition=example_def, + name="NC", + other_states={"NA_1": (st1, "x"), "NB": (st2, "y")}, + ) # x comes from NA_1 and y comes from NB, but NB has only NA_1/2's splitters, assert st3.splitter == ["_NA_1", "_NA_2"] assert st3.splitter_rpn == ["NA_1.a", "NA_2.a", "*"] @@ -1032,8 +1084,13 @@ def test_state_connect_innerspl_1(): """two 'connected' states: testing groups, prepare_states and prepare_inputs, the second state has an inner splitter, full splitter provided """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["_NA", "b"], other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter=["_NA", "b"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", "NB.b"] assert st2.splitter_rpn == ["NA.a", "NB.b", "*"] @@ -1084,8 +1141,10 @@ def test_state_connect_innerspl_1a(): the second state has an inner splitter, splitter from the first state (the prev-state part) has to be added """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="b", other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, name="NB", splitter="b", other_states={"NA": (st1, "b")} + ) assert st2.splitter == ["_NA", "NB.b"] assert st2.splitter_rpn == ["NA.a", "NB.b", "*"] @@ -1135,8 +1194,13 @@ def test_state_connect_innerspl_1a(): def test_state_connect_innerspl_1b(): """incorrect splitter - the current & prev-state parts in scalar splitter""" with pytest.raises(PydraStateError): - st1 = State(name="NA", splitter="a") - State(name="NB", splitter=("_NA", "b"), other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + State( + definition=example_def, + name="NB", + splitter=("_NA", "b"), + other_states={"NA": (st1, "b")}, + ) def test_state_connect_innerspl_2(): @@ -1144,8 +1208,13 @@ def test_state_connect_innerspl_2(): the second state has one inner splitter and one 'normal' splitter only the current part of the splitter provided (the prev-state has to be added) """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter=["c", "b"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", ["NB.c", "NB.b"]] assert st2.splitter_rpn == ["NA.a", "NB.c", "NB.b", "*", "*"] @@ -1215,8 +1284,13 @@ def test_state_connect_innerspl_2a(): only the current part of the splitter provided (different order!), """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["b", "c"], other_states={"NA": (st1, "b")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter=["b", "c"], + other_states={"NA": (st1, "b")}, + ) assert st2.splitter == ["_NA", ["NB.b", "NB.c"]] assert st2.splitter_rpn == ["NA.a", "NB.b", "NB.c", "*", "*"] @@ -1282,9 +1356,16 @@ def test_state_connect_innerspl_3(): the prev-state parts of the splitter have to be added """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}) - st3 = State(name="NC", splitter="d", other_states={"NB": (st2, "a")}) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + splitter=["c", "b"], + other_states={"NA": (st1, "b")}, + ) + st3 = State( + definition=example_def, name="NC", splitter="d", other_states={"NB": (st2, "a")} + ) assert st3.splitter == ["_NB", "NC.d"] assert st3.splitter_rpn == ["NA.a", "NB.c", "NB.b", "*", "*", "NC.d", "*"] @@ -1421,8 +1502,8 @@ def test_state_connect_innerspl_4(): """three'connected' states: testing groups, prepare_states and prepare_inputs, the third one connected to two previous, only the current part of splitter provided """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=["b", "c"]) + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter=["b", "c"]) st3 = State( name="NC", splitter="d", other_states={"NA": (st1, "e"), "NB": (st2, "f")} ) @@ -1506,7 +1587,7 @@ def test_state_connect_innerspl_4(): def test_state_combine_1(): """single state with splitter and combiner""" - st = State(name="NA", splitter="a", combiner="a") + st = State(definition=example_def, name="NA", splitter="a", combiner="a") assert st.splitter == "NA.a" assert st.splitter_rpn == ["NA.a"] assert st.current_combiner == st.current_combiner_all == st.combiner == ["NA.a"] @@ -1526,8 +1607,8 @@ def test_state_combine_1(): def test_state_connect_combine_1(): """two connected states; outer splitter and combiner in the first one""" - st1 = State(name="NA", splitter=["a", "b"], combiner="a") - st2 = State(name="NB", other_states={"NA": (st1, "c")}) + st1 = State(definition=example_def, name="NA", splitter=["a", "b"], combiner="a") + st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] assert st1.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -1571,8 +1652,10 @@ def test_state_connect_combine_2(): two connected states; outer splitter and combiner in the first one; additional splitter in the second node """ - st1 = State(name="NA", splitter=["a", "b"], combiner="a") - st2 = State(name="NB", splitter="d", other_states={"NA": (st1, "c")}) + st1 = State(definition=example_def, name="NA", splitter=["a", "b"], combiner="a") + st2 = State( + definition=example_def, name="NB", splitter="d", other_states={"NA": (st1, "c")} + ) assert st1.splitter == ["NA.a", "NA.b"] assert st1.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -1633,8 +1716,14 @@ def test_state_connect_combine_3(): two connected states; outer splitter and combiner in the first one; additional splitter in the second node """ - st1 = State(name="NA", splitter=["a", "b"], combiner="a") - st2 = State(name="NB", splitter="d", combiner="d", other_states={"NA": (st1, "c")}) + st1 = State(definition=example_def, name="NA", splitter=["a", "b"], combiner="a") + st2 = State( + definition=example_def, + name="NB", + splitter="d", + combiner="d", + other_states={"NA": (st1, "c")}, + ) assert st1.splitter == ["NA.a", "NA.b"] assert st1.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -1698,7 +1787,7 @@ def test_state_connect_combine_3(): def test_state_connect_innerspl_combine_1(): """one previous node and one inner splitter (and inner splitter combiner); only current part provided - the prev-state part had to be added""" - st1 = State(name="NA", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") st2 = State( name="NB", splitter=["c", "b"], combiner=["b"], other_states={"NA": (st1, "b")} ) @@ -1779,7 +1868,7 @@ def test_state_connect_innerspl_combine_2(): only the current part of the splitter provided, the prev-state part has to be added """ - st1 = State(name="NA", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") st2 = State( name="NB", splitter=["c", "b"], combiner=["c"], other_states={"NA": (st1, "b")} ) @@ -1855,8 +1944,13 @@ def test_state_connect_combine_prevst_1(): the second has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", other_states={"NA": (st1, "b")}, combiner="NA.a") + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State( + definition=example_def, + name="NB", + other_states={"NA": (st1, "b")}, + combiner="NA.a", + ) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] assert ( @@ -1885,8 +1979,13 @@ def test_state_connect_combine_prevst_2(): the second has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(name="NA", splitter=["a", "b"]) - st2 = State(name="NB", other_states={"NA": (st1, "b")}, combiner="NA.a") + st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st2 = State( + definition=example_def, + name="NB", + other_states={"NA": (st1, "b")}, + combiner="NA.a", + ) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a", "NA.b", "*"] assert st2.combiner == ["NA.a"] @@ -1921,9 +2020,14 @@ def test_state_connect_combine_prevst_3(): the third one has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(name="NA", splitter=["a", "b"]) - st2 = State(name="NB", other_states={"NA": (st1, "b")}) - st3 = State(name="NC", other_states={"NB": (st2, "c")}, combiner="NA.a") + st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "b")}) + st3 = State( + definition=example_def, + name="NC", + other_states={"NB": (st2, "c")}, + combiner="NA.a", + ) assert st3.splitter == "_NB" assert st3.splitter_rpn == ["NA.a", "NA.b", "*"] assert st3.combiner == ["NA.a"] @@ -1957,8 +2061,8 @@ def test_state_connect_combine_prevst_4(): the third state has only the prev-state part of splitter, the third state has also combiner from the prev-state part """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -2009,8 +2113,8 @@ def test_state_connect_combine_prevst_5(): the third state has scalar splitter in the prev-state part, the third state has also combiner from the prev-state part """ - st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter="a") + st1 = State(definition=example_def, name="NA", splitter="a") + st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( name="NC", splitter=("_NA", "_NB"), @@ -2043,7 +2147,7 @@ def test_state_connect_combine_prevst_6(): the second also has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(name="NA", splitter=["a", "b"]) + st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) st2 = State( name="NB", splitter="c", other_states={"NA": (st1, "b")}, combiner="NA.a" ) @@ -2098,20 +2202,42 @@ def test_state_connect_combine_prevst_6(): ] +@python.define +class ExampleDef(PythonDef["ExampleDef.Outputs"]): + + a: int + b: int + + class Outputs(PythonOutputs): + c: int + + def function(self): + return self.Outputs(c=self.inputs.a + self.inputs.b) + + +example_def = ExampleDef(a=1, b=2) + + @pytest.mark.parametrize( "splitter, other_states, expected_splitter, expected_prevst, expected_current", [ - (None, {"NA": (State(name="NA", splitter="a"), "b")}, "_NA", "_NA", None), + ( + None, + {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + "_NA", + "_NA", + None, + ), ( "b", - {"NA": (State(name="NA", splitter="a"), "b")}, + {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, ["_NA", "CN.b"], "_NA", "CN.b", ), ( ("b", "c"), - {"NA": (State(name="NA", splitter="a"), "b")}, + {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, ["_NA", ("CN.b", "CN.c")], "_NA", ("CN.b", "CN.c"), @@ -2119,8 +2245,8 @@ def test_state_connect_combine_prevst_6(): ( None, { - "NA": (State(name="NA", splitter="a"), "a"), - "NB": (State(name="NB", splitter="a"), "b"), + "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), + "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), }, ["_NA", "_NB"], ["_NA", "_NB"], @@ -2129,8 +2255,8 @@ def test_state_connect_combine_prevst_6(): ( "b", { - "NA": (State(name="NA", splitter="a"), "a"), - "NB": (State(name="NB", splitter="a"), "b"), + "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), + "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), }, [["_NA", "_NB"], "CN.b"], ["_NA", "_NB"], @@ -2139,8 +2265,8 @@ def test_state_connect_combine_prevst_6(): ( ["_NA", "b"], { - "NA": (State(name="NA", splitter="a"), "a"), - "NB": (State(name="NB", splitter="a"), "b"), + "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), + "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), }, [["_NB", "_NA"], "CN.b"], ["_NB", "_NA"], @@ -2161,13 +2287,19 @@ def test_connect_splitters( @pytest.mark.parametrize( "splitter, other_states", [ - (("_NA", "b"), {"NA": (State(name="NA", splitter="a"), "b")}), - (["b", "_NA"], {"NA": (State(name="NA", splitter="a"), "b")}), + ( + ("_NA", "b"), + {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + ), + ( + ["b", "_NA"], + {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + ), ( ["_NB", ["_NA", "b"]], { - "NA": (State(name="NA", splitter="a"), "a"), - "NB": (State(name="NB", splitter="a"), "b"), + "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), + "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), }, ), ], @@ -2183,7 +2315,9 @@ def test_connect_splitters_exception_2(): st = State( name="CN", splitter="_NB", - other_states={"NA": (State(name="NA", splitter="a"), "b")}, + other_states={ + "NA": (State(definition=example_def, name="NA", splitter="a"), "b") + }, ) st.set_input_groups() assert "can't ask for splitter from NB" in str(excinfo.value) @@ -2194,6 +2328,9 @@ def test_connect_splitters_exception_3(): State( name="CN", splitter="_NB", - other_states=["NA", (State(name="NA", splitter="a"), "b")], + other_states=[ + "NA", + (State(definition=example_def, name="NA", splitter="a"), "b"), + ], ) assert "other states has to be a dictionary" == str(excinfo.value) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 896398d060..071cc9faa0 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -18,8 +18,8 @@ ) from ..specs import ( ShellDef, - File, ) +from fileformats.generic import File from pydra.utils.hash import hash_function diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index 74ec853a84..ccca2e70c5 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -6,8 +6,8 @@ import tempfile import pytest from pydra.design import python -from pydra.engine.specs import File -from pydra.engine.workflow.lazy import LazyOutField +from fileformats.generic import File +from pydra.engine.lazy import LazyOutField from ..typing import TypeParser, MultiInputObj from fileformats.application import Json, Yaml, Xml from .utils import ( From 2d373312aeab8067154a1c8e307e52ad0a2afa3a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 13:52:40 +1100 Subject: [PATCH 152/342] fixing broken unittests pt1 --- pydra/design/python.py | 2 +- pydra/design/tests/test_python.py | 74 +++++++++---------------------- 2 files changed, 21 insertions(+), 55 deletions(-) diff --git a/pydra/design/python.py b/pydra/design/python.py index 197e357238..3baf7c5982 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -80,7 +80,7 @@ class out(Out): outputs """ - position: int = attrs.field(default=None) + pass @dataclass_transform( diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index 47d4347da7..fd91f7ed9f 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -26,10 +26,10 @@ def func(a: int) -> float: python.arg(name="a", type=int), python.arg(name="function", type=ty.Callable, default=func), ] - assert outputs == [python.out(name="out", type=float)] + assert outputs == [python.out(name="out", type=float, order=0)] definition = SampleDef(a=1) - result = definition() - assert result.output.out == 2.0 + outputs = definition() + assert outputs.out == 2.0 with pytest.raises(TypeError): SampleDef(a=1.5) @@ -49,9 +49,9 @@ def func(a: int, k: float = 2.0) -> float: python.arg(name="function", type=ty.Callable, default=func), python.arg(name="k", type=float, default=2.0), ] - assert outputs == [python.out(name="out", type=float)] - assert SampleDef(a=1)().output.out == 2.0 - assert SampleDef(a=10, k=3.0)().output.out == 30.0 + assert outputs == [python.out(name="out", type=float, order=0)] + assert SampleDef(a=1)().out == 2.0 + assert SampleDef(a=10, k=3.0)().out == 30.0 def test_interface_wrap_function_overrides(): @@ -73,7 +73,7 @@ def func(a: int) -> float: python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [ - python.out(name="b", type=Decimal, help="the doubled output"), + python.out(name="b", type=Decimal, help="the doubled output", order=0), ] outputs = SampleDef.Outputs(b=Decimal(2.0)) assert isinstance(outputs.b, Decimal) @@ -97,7 +97,7 @@ def func(a: int) -> int: python.arg(name="a", type=float), python.arg(name="function", type=ty.Callable, default=func), ] - assert outputs == [python.out(name="b", type=float)] + assert outputs == [python.out(name="b", type=float, order=0)] intf = SampleDef(a=1) assert isinstance(intf.a, float) outputs = SampleDef.Outputs(b=2.0) @@ -111,7 +111,6 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: return a + b, a * b assert issubclass(SampleDef, PythonDef) - assert SampleDef.Task is PythonTask inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ @@ -124,36 +123,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float), - python.out(name="d", type=float), - ] - assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" - SampleDef.Outputs(c=1.0, d=2.0) - - -def test_interface_with_function_implicit_outputs_from_return_stmt(): - @python.define - def SampleDef(a: int, b: float) -> tuple[float, float]: - """Sample function for testing""" - c = a + b - d = a * b - return c, d - - assert SampleDef.Task is PythonTask - inputs = sorted(list_fields(SampleDef), key=sort_key) - outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) - assert inputs == [ - python.arg(name="a", type=int), - python.arg(name="b", type=float), - python.arg( - name="function", - type=ty.Callable, - default=attrs.fields(SampleDef).function.default, - ), - ] - assert outputs == [ - python.out(name="c", type=float), - python.out(name="d", type=float), + python.out(name="c", type=float, order=0), + python.out(name="d", type=float, order=1), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" SampleDef.Outputs(c=1.0, d=2.0) @@ -171,7 +142,6 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleDef.Task is PythonTask inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ @@ -184,8 +154,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b"), - python.out(name="d", type=float, help="product of a and b"), + python.out(name="c", type=float, help="Sum of a and b", order=0), + python.out(name="d", type=float, help="product of a and b", order=1), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -206,7 +176,6 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleDef.Task is PythonTask inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ @@ -219,8 +188,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b"), - python.out(name="d", type=float, help="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b", order=0), + python.out(name="d", type=float, help="Product of a and b", order=1), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -249,7 +218,6 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: """ return a + b, a * b - assert SampleDef.Task is PythonTask inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ @@ -262,8 +230,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b"), - python.out(name="d", type=float, help="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b", order=0), + python.out(name="d", type=float, help="Product of a and b", order=1), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -297,7 +265,6 @@ def function(a, b): return a + b, a * b assert issubclass(SampleDef, PythonDef) - assert SampleDef.Task is PythonTask inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ @@ -310,8 +277,8 @@ def function(a, b): ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b"), - python.out(name="d", type=float, help="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b", order=0), + python.out(name="d", type=float, help="Product of a and b", order=1), ] assert SampleDef.function.__name__ == "function" SampleDef(a=1) @@ -368,7 +335,6 @@ class Outputs: def function(a, b): return a + b, a * b - assert SampleDef.Task is PythonTask inputs = sorted(list_fields(SampleDef), key=sort_key) outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ @@ -381,8 +347,8 @@ def function(a, b): ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b"), - python.out(name="d", type=float, help="Product of a and b"), + python.out(name="c", type=float, help="Sum of a and b", order=0), + python.out(name="d", type=float, help="Product of a and b", order=1), ] assert SampleDef.function.__name__ == "function" SampleDef(a=1, b=2.0) From 4da2eefaff7dc9170c6b4df58db783ad56a1b086 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 19:21:33 +1100 Subject: [PATCH 153/342] debugging unittests --- new-docs/source/tutorial/5-shell.ipynb | 10 +- new-docs/tst.py | 4 +- pydra/design/base.py | 23 +-- pydra/design/python.py | 7 +- pydra/design/shell.py | 31 ++-- pydra/design/tests/test_shell.py | 192 +++++++++++---------- pydra/engine/helpers.py | 4 +- pydra/engine/specs.py | 36 ++-- pydra/engine/tests/test_dockertask.py | 30 ++-- pydra/engine/tests/test_environments.py | 4 +- pydra/engine/tests/test_nipype1_convert.py | 2 +- pydra/engine/tests/test_shelltask.py | 10 +- pydra/engine/tests/test_specs.py | 4 +- pydra/engine/tests/test_submitter.py | 8 +- pydra/engine/tests/test_task.py | 70 ++++---- pydra/engine/tests/test_workflow.py | 20 +-- pydra/utils/tests/test_typing.py | 8 +- 17 files changed, 251 insertions(+), 212 deletions(-) diff --git a/new-docs/source/tutorial/5-shell.ipynb b/new-docs/source/tutorial/5-shell.ipynb index 3b90c6488d..0cb40bc080 100644 --- a/new-docs/source/tutorial/5-shell.ipynb +++ b/new-docs/source/tutorial/5-shell.ipynb @@ -67,11 +67,11 @@ "print(f\"Command-line to be run: {cp.cmdline}\")\n", "\n", "# Run the shell-comand task\n", - "result = cp()\n", + "outputs = cp()\n", "\n", "print(\n", - " f\"Contents of copied file ('{result.output.destination}'): \"\n", - " f\"'{Path(result.output.destination).read_text()}'\"\n", + " f\"Contents of copied file ('{outputs.destination}'): \"\n", + " f\"'{Path(outputs.destination).read_text()}'\"\n", ")" ] }, @@ -335,10 +335,10 @@ "cp_with_size = CpWithSize(in_file=File.sample())\n", "\n", "# Run the command\n", - "result = cp_with_size()\n", + "outputs = cp_with_size()\n", "\n", "\n", - "print(f\"Size of the output file is: {result.output.out_file_size}\")" + "print(f\"Size of the output file is: {outputs.out_file_size}\")" ] }, { diff --git a/new-docs/tst.py b/new-docs/tst.py index d3589ba074..461b89d79a 100644 --- a/new-docs/tst.py +++ b/new-docs/tst.py @@ -8,7 +8,7 @@ load_json = LoadJson(file=json_file) # Run the task -result = load_json(plugin="serial") +outputs = load_json(plugin="serial") # Print the output interface of the of the task (LoadJson.Outputs) -print(result.outputs) +print(outputs) diff --git a/pydra/design/base.py b/pydra/design/base.py index f5dd8b7507..ab7b376f81 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -54,7 +54,7 @@ def __bool__(self): def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: """Ensure the default value has been coerced into the correct type""" - if value is EMPTY: + if value is EMPTY or isinstance(value, attrs.Factory): return value return TypeParser[self_.type](self_.type, label=self_.name)(value) @@ -197,6 +197,10 @@ def requirements_satisfied(self, inputs: "TaskDef") -> bool: """Check if all the requirements are satisfied by the inputs""" return any(req.satisfied(inputs) for req in self.requires) + @property + def mandatory(self): + return self.default is EMPTY + @attrs.define(kw_only=True) class Arg(Field): @@ -240,7 +244,7 @@ class Arg(Field): readonly: bool = False -@attrs.define(kw_only=True, slots=False) +@attrs.define(kw_only=True) class Out(Field): """Base class for output fields of task definitions @@ -265,7 +269,7 @@ class Out(Field): The order of the output in the output list, allows for tuple unpacking of outputs """ - order: int = attrs.field(default=None) + pass def extract_fields_from_class( @@ -394,10 +398,6 @@ def make_task_def( spec_type._check_arg_refs(inputs, outputs) - # Set positions for outputs to allow for tuple unpacking - for i, output in enumerate(outputs.values()): - output.order = i - if name is None and klass is not None: name = klass.__name__ if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: @@ -405,11 +405,11 @@ def make_task_def( f"{reserved_names} are reserved and cannot be used for {spec_type} field names" ) outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) + if issubclass(klass, TaskDef) and not issubclass(klass, spec_type): + raise ValueError(f"Cannot change type of definition {klass} to {spec_type}") if klass is None or not issubclass(klass, spec_type): if name is None: raise ValueError("name must be provided if klass is not") - if klass is not None and issubclass(klass, TaskDef): - raise ValueError(f"Cannot change type of definition {klass} to {spec_type}") bases = tuple(bases) # Ensure that TaskDef is a base class if not any(issubclass(b, spec_type) for b in bases): @@ -518,16 +518,17 @@ def make_outputs_spec( field.name = name field.type = base.__annotations__.get(name, ty.Any) outputs.update(base_outputs) + assert all(o.name == n for n, o in outputs.items()) outputs_klass = type( spec_name + "Outputs", tuple(outputs_bases), { - o.name: attrs.field( + n: attrs.field( converter=make_converter(o, f"{spec_name}.Outputs"), metadata={PYDRA_ATTR_METADATA: o}, **_get_default(o), ) - for o in outputs.values() + for n, o in outputs.items() }, ) outputs_klass.__annotations__.update((o.name, o.type) for o in outputs.values()) diff --git a/pydra/design/python.py b/pydra/design/python.py index 3baf7c5982..8036e5bc14 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -80,7 +80,7 @@ class out(Out): outputs """ - pass + order: int = attrs.field(default=None) @dataclass_transform( @@ -161,6 +161,11 @@ def make(wrapped: ty.Callable | type) -> PythonDef: name="function", type=ty.Callable, default=function ) + # Set positions for outputs to allow for tuple unpacking + output: out + for i, output in enumerate(parsed_outputs.values()): + output.order = i + interface = make_task_def( PythonDef, PythonOutputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 3a51b9f5d3..cafeb0b291 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -105,11 +105,12 @@ def _validate_sep(self, attribute, value): value is not None and self.type is not ty.Any and ty.get_origin(self.type) is not MultiInputObj - and not issubclass(self.type, ty.Iterable) ): - raise ValueError( - f"sep ({value!r}) can only be provided when type is iterable" - ) + tp = ty.get_origin(self.type) or self.type + if not issubclass(tp, ty.Iterable): + raise ValueError( + f"sep ({value!r}) can only be provided when type is iterable" + ) @attrs.define(kw_only=True) @@ -353,6 +354,12 @@ def make( if class_name[0].isdigit(): class_name = f"_{class_name}" + # Add in fields from base classes + parsed_inputs.update({n: getattr(ShellDef, n) for n in ShellDef.BASE_NAMES}) + parsed_outputs.update( + {n: getattr(ShellOutputs, n) for n in ShellOutputs.BASE_NAMES} + ) + # Update the inputs (overriding inputs from base classes) with the executable # and the output argument fields parsed_inputs.update( @@ -371,10 +378,12 @@ def make( # Set positions for the remaining inputs that don't have an explicit position position_stack = remaining_positions(list(parsed_inputs.values())) for inpt in parsed_inputs.values(): + if inpt.name == "additional_args": + continue if inpt.position is None: inpt.position = position_stack.pop(0) - interface = make_task_def( + defn = make_task_def( ShellDef, ShellOutputs, parsed_inputs, @@ -384,7 +393,7 @@ def make( bases=bases, outputs_bases=outputs_bases, ) - return interface + return defn # If a name is provided (and hence not being used as a decorator), check to see if # we are extending from a class that already defines an executable @@ -479,17 +488,19 @@ def parse_command_line_template( outputs = {} parts = template.split() executable = [] - for i, part in enumerate(parts, start=1): + start_args_index = 0 + for part in parts: if part.startswith("<") or part.startswith("-"): break executable.append(part) + start_args_index += 1 if not executable: raise ValueError(f"Found no executable in command line template: {template}") if len(executable) == 1: executable = executable[0] - if i == len(parts): + args_str = " ".join(parts[start_args_index:]) + if not args_str: return executable, inputs, outputs - args_str = " ".join(parts[i - 1 :]) tokens = re.split(r"\s+", args_str.strip()) arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+(?:\?|=[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" @@ -662,7 +673,7 @@ def remaining_positions( # Check for multiple positions positions = defaultdict(list) for arg in args: - if arg.name == "arguments": + if arg.name == "additional_args": continue if arg.position is not None: if arg.position >= 0: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index bcbab495aa..9a04eb2b6b 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -35,11 +35,12 @@ def test_interface_template(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg(name="in_path", type=FsObject, argpos=1), + shell.arg(name="in_path", type=FsObject, position=1), output, + ShellDef.additional_args, ] assert sorted_fields(Cp.Outputs) == [ output, @@ -82,11 +83,12 @@ def test_interface_template_w_types_and_path_template_ext(): validator=attrs.validators.min_len(1), default="trim-png", type=str | ty.Sequence[str], - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg(name="in_image", type=image.Png, argpos=1), + shell.arg(name="in_image", type=image.Png, position=1), output, + ShellDef.additional_args, ] assert sorted_fields(TrimPng.Outputs) == [ output, @@ -122,10 +124,13 @@ def test_interface_template_w_modify(): validator=attrs.validators.min_len(1), default="trim-png", type=str | ty.Sequence[str], - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg(name="image", type=image.Png, argpos=1, copy_mode=File.CopyMode.copy), + shell.arg( + name="image", type=image.Png, position=1, copy_mode=File.CopyMode.copy + ), + ShellDef.additional_args, ] assert sorted_fields(TrimPng.Outputs) == [ shell.out( @@ -178,35 +183,36 @@ def test_interface_template_more_complex(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), output, - shell.arg(name="recursive", argstr="-R", type=bool, default=False, argpos=3), + shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), shell.arg( name="text_arg", argstr="--text-arg", type=str | None, default=None, - argpos=4, + position=4, ), shell.arg( name="int_arg", argstr="--int-arg", type=int | None, default=None, - argpos=5, + position=5, ), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str] | None, default=None, - argpos=6, + position=6, ), + ShellDef.additional_args, ] assert sorted_fields(Cp.Outputs) == [ output, @@ -268,45 +274,41 @@ def test_interface_template_with_overrides_and_optionals(): position=-1, ), ] - assert ( - sorted_fields(Cp) - == [ - shell.arg( - name="executable", - validator=attrs.validators.min_len(1), - default="cp", - type=str | ty.Sequence[str], - argpos=0, - help=shell.EXECUTABLE_HELP_STRING, - ), - shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " - ), - shell.arg( - name="recursive", - argstr="-R", - type=bool, - default=False, - help=RECURSIVE_HELP, - argpos=2, - ), - shell.arg(name="text_arg", argstr="--text-arg", type=str, argpos=3), - shell.arg( - name="int_arg", - argstr="--int-arg", - type=int | None, - default=None, - argpos=4, - ), - shell.arg( - name="tuple_arg", - argstr="--tuple-arg", - type=tuple[int, str], - argpos=5, - ), - ] - + outargs - ) + assert sorted_fields(Cp) == [ + shell.arg( + name="executable", + validator=attrs.validators.min_len(1), + default="cp", + type=str | ty.Sequence[str], + position=0, + help=shell.EXECUTABLE_HELP_STRING, + ), + shell.arg( + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + ), + shell.arg( + name="recursive", + argstr="-R", + type=bool, + default=False, + help=RECURSIVE_HELP, + position=2, + ), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=3), + shell.arg( + name="int_arg", + argstr="--int-arg", + type=int | None, + default=None, + position=4, + ), + shell.arg( + name="tuple_arg", + argstr="--tuple-arg", + type=tuple[int, str], + position=5, + ), + ] + outargs + [ShellDef.additional_args] assert sorted_fields(Cp.Outputs) == outargs + [ shell.out( name="return_code", @@ -351,25 +353,26 @@ def test_interface_template_with_defaults(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), output, - shell.arg(name="recursive", argstr="-R", type=bool, default=True, argpos=3), + shell.arg(name="recursive", argstr="-R", type=bool, default=True, position=3), shell.arg( - name="text_arg", argstr="--text-arg", type=str, argpos=4, default="foo" + name="text_arg", argstr="--text-arg", type=str, position=4, default="foo" ), - shell.arg(name="int_arg", argstr="--int-arg", type=int, argpos=5, default=99), + shell.arg(name="int_arg", argstr="--int-arg", type=int, position=5, default=99), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], default=(1, "bar"), - argpos=6, + position=6, ), + ShellDef.additional_args, ] assert sorted_fields(Cp.Outputs) == [ output, @@ -419,27 +422,28 @@ def test_interface_template_with_type_overrides(): validator=attrs.validators.min_len(1), default="cp", type=str | ty.Sequence[str], - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], argpos=1, sep=" " + name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " ), output, - shell.arg(name="recursive", argstr="-R", type=bool, default=False, argpos=3), - shell.arg(name="text_arg", argstr="--text-arg", type=str, argpos=4), + shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), + shell.arg(name="text_arg", argstr="--text-arg", type=str, position=4), shell.arg( name="int_arg", argstr="--int-arg", type=int | None, - argpos=5, + position=5, ), shell.arg( name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], - argpos=6, + position=6, ), + ShellDef.additional_args, ] assert sorted_fields(Cp.Outputs) == [ output, @@ -472,7 +476,7 @@ class Ls(ShellDef["Ls.Outputs"]): directory: Directory = shell.arg( help="the directory to list the contents of", argstr="", - argpos=-1, + position=-1, ) hidden: bool = shell.arg( help=("display hidden FS objects"), @@ -523,15 +527,17 @@ class Outputs(ShellOutputs): type=Directory, help="the directory to list the contents of", argstr="", - argpos=-1, + position=-1, ), "hidden": shell.arg( type=bool, help="display hidden FS objects", + default=False, argstr="-a", ), "long_format": { # Mix it up with a full dictionary based definition "type": bool, + "default": False, "help": ( "display properties of FS object, such as permissions, size and " "timestamps " @@ -541,6 +547,7 @@ class Outputs(ShellOutputs): "human_readable": shell.arg( type=bool, help="display file sizes in human readable form", + default=False, argstr="-h", requires=["long_format"], ), @@ -555,6 +562,7 @@ class Outputs(ShellOutputs): "date_format_str": shell.arg( type=str | None, help="format string for ", + default=None, argstr="-D", requires=["long_format"], xor=["complete_date"], @@ -579,6 +587,7 @@ class Outputs(ShellOutputs): def test_shell_fields(Ls): assert sorted([a.name for a in sorted_fields(Ls)]) == sorted( [ + "additional_args", "executable", "directory", "hidden", @@ -626,9 +635,9 @@ def test_shell_run(Ls, tmp_path): # Drop Long format flag to make output simpler ls = Ls(directory=tmp_path) - result = ls() + outputs = ls() - assert sorted(result.output.entries) == ["a", "b", "c"] + assert sorted(outputs.entries) == ["a", "b", "c"] @pytest.fixture(params=["static", "dynamic"]) @@ -647,7 +656,7 @@ class A: executable = "cp" - x: File = shell.arg(argstr="", argpos=1) + x: File = shell.arg(argstr="", position=1) class Outputs: """The outputs of the example shell interface @@ -667,7 +676,7 @@ class Outputs: type=File, help="an input file", argstr="", - argpos=1, + position=1, ), }, outputs={ @@ -697,7 +706,7 @@ class A: executable = "cp" - x: File = shell.arg(help="an input file", argstr="", argpos=1) + x: File = shell.arg(help="an input file", argstr="", position=1) class Outputs: y: File = shell.outarg( @@ -707,7 +716,11 @@ class Outputs: position=-1, ) - assert sorted([a.name for a in attrs.fields(A)]) == ["executable", "x", "y"] + assert sorted([a.name for a in attrs.fields(A) if not a.name.startswith("_")]) == [ + "executable", + "x", + "y", + ] assert sorted(a.name for a in attrs.fields(A.Outputs)) == [ "return_code", "stderr", @@ -729,7 +742,7 @@ class Outputs: default="cp", type=str | ty.Sequence[str], argstr="", - argpos=0, + position=0, help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( @@ -737,9 +750,10 @@ class Outputs: type=File, help="an input file", argstr="", - argpos=1, + position=1, ), output, + ShellDef.additional_args, ] assert sorted_fields(A.Outputs) == [ output, @@ -770,7 +784,7 @@ def test_shell_output_field_name_dynamic(): type=File, help="an input file", argstr="", - argpos=1, + position=1, ), }, outputs={ @@ -794,7 +808,7 @@ def get_file_size(y: Path): def test_shell_bases_dynamic(A, tmp_path): B = shell.define( name="B", - inputs={"y": shell.arg(type=File, help="output file", argstr="", argpos=-1)}, + inputs={"y": shell.arg(type=File, help="output file", argstr="", position=-1)}, outputs={ "out_file_size": { "type": int, @@ -815,8 +829,8 @@ def test_shell_bases_dynamic(A, tmp_path): assert b.x == File(xpath) assert b.y == File(ypath) - # result = b() - # assert result.output.y == str(ypath) + # outputs = b() + # assert outputs.y == str(ypath) def test_shell_bases_static(A, tmp_path): @@ -848,8 +862,8 @@ class Outputs: # gets coerced to a text.Plain object assert b.y == text.Plain(ypath) - # result = b() - # assert result.output.y == str(ypath) + # outputs = b() + # assert outputs.y == str(ypath) def test_shell_inputs_outputs_bases_dynamic(tmp_path): @@ -861,7 +875,7 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): type=Directory, help="input directory", argstr="", - argpos=-1, + position=-1, ) }, outputs={ @@ -892,9 +906,9 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): assert b.hidden # File.sample(tmp_path, stem=".hidden-file") - # result = b() + # outputs = b() # assert result.runner.cmdline == f"ls -a {tmp_path}" - # assert result.output.entries == [".", "..", ".hidden-file"] + # assert outputs.entries == [".", "..", ".hidden-file"] def test_shell_inputs_outputs_bases_static(tmp_path): @@ -902,7 +916,7 @@ def test_shell_inputs_outputs_bases_static(tmp_path): class A: executable = "ls" - directory: Directory = shell.arg(help="input directory", argstr="", argpos=-1) + directory: Directory = shell.arg(help="input directory", argstr="", position=-1) class Outputs: entries: list = shell.out( @@ -925,8 +939,8 @@ class B(A): assert b.directory == Directory(tmp_path) assert b.hidden - # result = b() - # assert result.output.entries == [".", "..", ".hidden"] + # outputs = b() + # assert outputs.entries == [".", "..", ".hidden"] def test_shell_missing_executable_static(): @@ -935,7 +949,7 @@ def test_shell_missing_executable_static(): @shell.define class A: directory: Directory = shell.arg( - help="input directory", argstr="", argpos=-1 + help="input directory", argstr="", position=-1 ) class Outputs: @@ -957,7 +971,7 @@ def test_shell_missing_executable_dynamic(): type=Directory, help="input directory", argstr="", - argpos=-1, + position=-1, ), }, outputs={ @@ -976,9 +990,11 @@ def list_entries(stdout): def sorted_fields(interface): fields = list_fields(interface) - length = len(fields) + length = len(fields) - 1 def pos_key(out: shell.out) -> int: + if out.name == "additional_args": + return (length + 1, out.name) try: pos = out.position except AttributeError: diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 4acfcc342d..df27ebeecd 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -164,7 +164,7 @@ def save( if result: if task_path.name.startswith("Workflow") and result.outputs is not None: # copy files to the workflow directory - result = copyfile_workflow(wf_path=task_path, result=result) + result.outputs = copyfile_workflow(wf_path=task_path, result=result) with (task_path / f"{name_prefix}_result.pklz").open("wb") as fp: cp.dump(result, fp) if task: @@ -172,7 +172,7 @@ def save( cp.dump(task, fp) -def copyfile_workflow(wf_path: os.PathLike, result): +def copyfile_workflow(wf_path: os.PathLike, result: "Result") -> "Result": """if file in the wf results, the file will be copied to the workflow directory""" from .helpers_file import copy_nested_files diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 07e7c1a6ff..0ac70438d4 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -109,12 +109,6 @@ def __getitem__(self, name_or_index: str | int) -> ty.Any: f"{self} doesn't have an attribute {name_or_index}" ) from None - def __iter__(self) -> ty.Generator[ty.Any, None, None]: - """Iterate through all the values in the definition, allows for tuple unpacking""" - fields = sorted(attrs_fields(self), key=attrgetter("order")) - for field in fields: - yield getattr(self, field.name) - OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) @@ -427,7 +421,7 @@ def _check_rules(self): field: Arg errors = [] for field in list_fields(self): - value = getattr(self, field.name) + value = self[field.name] if is_lazy(value): continue @@ -437,7 +431,7 @@ def _check_rules(self): # Collect alternative fields associated with this field. if field.xor: - mutually_exclusive = {name: getattr(self, name) for name in field.xor} + mutually_exclusive = {name: self[name] for name in field.xor} are_set = [ f"{n}={v!r}" for n, v in mutually_exclusive.items() if v is not None ] @@ -446,7 +440,7 @@ def _check_rules(self): f"Mutually exclusive fields {field.xor} are set together: " + ", ".join(are_set) ) - elif not are_set: + elif field.mandatory and not are_set: errors.append( f"At least one of the mutually exclusive fields {field.xor} " f"should be set" @@ -588,6 +582,12 @@ class RuntimeSpec: class PythonOutputs(TaskOutputs): + def __iter__(self) -> ty.Generator[ty.Any, None, None]: + """Iterate through all the values in the definition, allows for tuple unpacking""" + fields = sorted(attrs_fields(self), key=attrgetter("order")) + for field in fields: + yield getattr(self, field.name) + @classmethod def _from_task(cls, task: "Task[PythonDef]") -> Self: """Collect the outputs of a task from a combination of the provided inputs, @@ -725,9 +725,11 @@ def construct(self) -> "Workflow": class ShellOutputs(TaskOutputs): """Output definition of a generic shell process.""" - return_code: int = shell.out(help=RETURN_CODE_HELP) - stdout: str = shell.out(help=STDOUT_HELP) - stderr: str = shell.out(help=STDERR_HELP) + BASE_NAMES = ["return_code", "stdout", "stderr"] + + return_code: int = shell.out(name="return_code", type=int, help=RETURN_CODE_HELP) + stdout: str = shell.out(name="stdout", type=str, help=STDOUT_HELP) + stderr: str = shell.out(name="stderr", type=str, help=STDERR_HELP) @classmethod def _from_task(cls, task: "Task[ShellDef]") -> Self: @@ -882,8 +884,12 @@ def _resolve_value( class ShellDef(TaskDef[ShellOutputsType]): - arguments: ty.List[str] = shell.arg( + BASE_NAMES = ["additional_args"] + + additional_args: list[str] = shell.arg( + name="additional_args", default=attrs.Factory(list), + type=list[str], sep=" ", help="Additional free-form arguments to append to the end of the command.", ) @@ -937,7 +943,7 @@ def _command_args( continue if name == "executable": pos_args.append(self._command_shelltask_executable(field, value)) - elif name == "arguments": + elif name == "additional_args": continue elif name == "args": pos_val = self._command_shelltask_args(field, value) @@ -967,7 +973,7 @@ def _command_args( cmd_args = position_sort(pos_args) # pos_args values are each a list of arguments, so concatenate lists after sorting command_args = sum(cmd_args, []) - command_args += self.arguments + command_args += inputs["additional_args"] return command_args def _command_shelltask_executable( diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 94cb71b49e..a80089f5a5 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -140,7 +140,7 @@ def test_docker_inputspec_1(tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", ) @@ -173,7 +173,7 @@ def test_docker_inputspec_1a(tmp_path): name="file", type=File, default=filename, - argpos=1, + position=1, argstr="", help="input file", ) @@ -206,7 +206,7 @@ def test_docker_inputspec_2(plugin, tmp_path): shell.arg( name="file1", type=File, - argpos=1, + position=1, argstr="", help="input file 1", ), @@ -214,7 +214,7 @@ def test_docker_inputspec_2(plugin, tmp_path): name="file2", type=File, default=filename_2, - argpos=2, + position=2, argstr="", help="input file 2", ), @@ -250,14 +250,14 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): name="file1", type=File, default=filename_1, - argpos=1, + position=1, argstr="", help="input file 1", ), shell.arg( name="file2", type=File, - argpos=2, + position=2, argstr="", help="input file 2", ), @@ -295,14 +295,14 @@ def test_docker_inputspec_2a(plugin, tmp_path): name="file1", type=File, default=filename_1, - argpos=1, + position=1, argstr="", help="input file 1", ), shell.arg( name="file2", type=File, - argpos=2, + position=2, argstr="", help="input file 2", ), @@ -332,7 +332,7 @@ def test_docker_inputspec_3(plugin, tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", container_path=True, @@ -368,7 +368,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): shell.arg( name="orig_file", type=File, - argpos=1, + position=1, argstr="", help="orig file", copyfile="copy", @@ -418,7 +418,7 @@ def test_docker_inputspec_state_1(plugin, tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", ) @@ -454,7 +454,7 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", ) @@ -483,7 +483,7 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", ) @@ -525,7 +525,7 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", ) @@ -569,7 +569,7 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - argpos=1, + position=1, argstr="", help="input file", ) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 2b2d036c6c..6114bf6c91 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -175,7 +175,7 @@ def create_shelly_inputfile(tempdir, filename, name, executable): shell.arg( name="file", type=File, - argpos=1, + position=1, help="files", argstr="", ) @@ -350,7 +350,7 @@ def create_shelly_outputfile(tempdir, filename, name, executable="cp"): shell.arg( name="file_orig", type=File, - argpos=2, + position=2, help="new file", argstr="", ), diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index 60739bd6e0..07af76e501 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -16,7 +16,7 @@ def find_txt(output_dir: Path) -> File: interf_outputs = [shell.out(name="test_out", type=File, callable=find_txt)] -Interf_1 = shell.define(inputs=interf_inputs, outputs=interf_outputs) +Interf_1 = shell.define("testing", inputs=interf_inputs, outputs=interf_outputs) Interf_2 = shell.define("testing command", inputs=interf_inputs, outputs=interf_outputs) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 2c458a494d..117489b3fd 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1601,7 +1601,7 @@ def test_shell_cmd_inputspec_11(tmp_path): sub(wf) result = wf.result() - for out_file in result.output.out: + for out_file in outputs.out: assert out_file.fspath.name == "test1" or out_file.fspath.name == "test2" @@ -4383,8 +4383,8 @@ def test_shell_cmd_optional_output_file1(tmp_path): ) file1 = tmp_path / "file1.txt" file1.write_text("foo") - result = my_cp(input=file1, unused=False) - assert result.output.output.fspath.read_text() == "foo" + outputs = my_cp(input=file1, unused=False) + assert outputs.output.fspath.read_text() == "foo" def test_shell_cmd_optional_output_file2(tmp_path): @@ -4421,8 +4421,8 @@ def test_shell_cmd_optional_output_file2(tmp_path): ) file1 = tmp_path / "file1.txt" file1.write_text("foo") - result = my_cp(input=file1, output=True) - assert result.output.output.fspath.read_text() == "foo" + outputs = my_cp(input=file1, output=True) + assert outputs.output.fspath.read_text() == "foo" file2 = tmp_path / "file2.txt" file2.write_text("bar") diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 544b7570d2..757c94d728 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -401,5 +401,5 @@ def identity(x: int) -> int: outer.add(inner.split(x=outer.lzin.x)) outer.set_output(("out", outer.inner.lzout.out)) - result = outer(x=[1, 2, 3]) - assert result.output.out == StateArray([1, 2, 3]) + outputs = outer(x=[1, 2, 3]) + assert outputs.out == StateArray([1, 2, 3]) diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index ef0c898000..9150ad0087 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -704,16 +704,16 @@ def test_byo_worker(): with Submitter(worker=BYOAddVarWorker, add_var=10) as sub: assert sub.plugin == "byo_add_env_var" - result = task1(submitter=sub) + result = sub(task1) - assert result.output.out == 11 + assert outputs.out == 11 task2 = add_env_var_task(x=2) with Submitter(worker="serial") as sub: - result = task2(submitter=sub) + result = sub(task2) - assert result.output.out == 2 + assert outputs.out == 2 def test_bad_builtin_worker(): diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 071cc9faa0..339a7bc1e7 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -88,10 +88,10 @@ def testfunc( # assert funky.inputs.hash == '17772c3aec9540a8dd3e187eecd2301a09c9a25c6e371ddd86e31e3a1ecfeefa' assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - result = funky() + outputs = funky() assert hasattr(result, "output") - assert hasattr(result.output, "out_out") - assert result.output.out_out == 1.1 + assert hasattr(outputs, "out_out") + assert outputs.out_out == 1.1 assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") funky.result() # should not recompute @@ -100,7 +100,7 @@ def testfunc( assert funky.result() is None funky() result = funky.result() - assert result.output.out_out == 2.1 + assert outputs.out_out == 2.1 help = funky.help(returnhelp=True) assert help == [ @@ -123,16 +123,16 @@ def testfunc(a: int, b: int): return dict(sum=a + b, diff=a - b) task = testfunc(a=2, b=3) - result = task() + outputs = task() # Part of the annotation and returned, should be exposed to output. - assert result.output.sum == 5 + assert outputs.sum == 5 # Part of the annotation but not returned, should be coalesced to None - assert result.output.mul is None + assert outputs.mul is None # Not part of the annotation, should be discarded. - assert not hasattr(result.output, "diff") + assert not hasattr(outputs, "diff") def test_annotated_func_multreturn(): @@ -154,13 +154,13 @@ def testfunc( assert set(funky.output_names) == {"fractional", "integer"} assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - result = funky() + outputs = funky() assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") assert hasattr(result, "output") - assert hasattr(result.output, "fractional") - assert result.output.fractional == 0.5 - assert hasattr(result.output, "integer") - assert result.output.integer == 3 + assert hasattr(outputs, "fractional") + assert outputs.fractional == 0.5 + assert hasattr(outputs, "integer") + assert outputs.integer == 3 help = funky.help(returnhelp=True) assert help == [ @@ -453,10 +453,10 @@ def testfunc(a, b) -> int: assert set(funky.output_names) == {"out"} assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - result = funky() + outputs = funky() assert hasattr(result, "output") - assert hasattr(result.output, "out") - assert result.output.out == 30 + assert hasattr(outputs, "out") + assert outputs.out == 30 assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") @@ -465,7 +465,7 @@ def testfunc(a, b) -> int: assert funky.result() is None funky() result = funky.result() - assert result.output.out == 31 + assert outputs.out == 31 help = funky.help(returnhelp=True) assert help == [ @@ -494,10 +494,10 @@ def testfunc(a, b) -> (int, int): assert set(funky.output_names) == {"out1", "out2"} assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum - result = funky() + outputs = funky() assert hasattr(result, "output") - assert hasattr(result.output, "out1") - assert result.output.out1 == 11 + assert hasattr(outputs, "out1") + assert outputs.out1 == 11 assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") @@ -506,7 +506,7 @@ def testfunc(a, b) -> (int, int): assert funky.result() is None funky() result = funky.result() - assert result.output.out1 == 12 + assert outputs.out1 == 12 help = funky.help(returnhelp=True) assert help == [ @@ -533,8 +533,8 @@ def no_annots(c, d): result = natask._run() assert hasattr(result, "output") - assert hasattr(result.output, "out") - assert result.output.out == 20.2 + assert hasattr(outputs, "out") + assert outputs.out == 20.2 def test_notannotated_func_returnlist(): @@ -544,8 +544,8 @@ def no_annots(c, d): natask = no_annots(c=17, d=3.2) result = natask._run() - assert hasattr(result.output, "out") - assert result.output.out == [17, 3.2] + assert hasattr(outputs, "out") + assert outputs.out == [17, 3.2] def test_halfannotated_func_multrun_returnlist(): @@ -556,10 +556,10 @@ def no_annots(c, d) -> (list, float): natask = no_annots(c=17, d=3.2) result = natask._run() - assert hasattr(result.output, "out1") - assert hasattr(result.output, "out2") - assert result.output.out1 == [17, 3.2] - assert result.output.out2 == 20.2 + assert hasattr(outputs, "out1") + assert hasattr(outputs, "out2") + assert outputs.out1 == [17, 3.2] + assert outputs.out2 == 20.2 def test_notannotated_func_multreturn(): @@ -578,8 +578,8 @@ def no_annots(c, d): result = natask._run() assert hasattr(result, "output") - assert hasattr(result.output, "out") - assert result.output.out == (20.2, 13.8) + assert hasattr(outputs, "out") + assert outputs.out == (20.2, 13.8) def test_input_spec_func_1(): @@ -1427,10 +1427,10 @@ def test_taskhooks_3(tmpdir, capsys): foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) def myhook_postrun_task(task, result, *args): - print(f"postrun task hook, the result is {result.output.out}") + print(f"postrun task hook, the result is {outputs.out}") def myhook_postrun(task, result, *args): - print(f"postrun hook, the result is {result.output.out}") + print(f"postrun hook, the result is {outputs.out}") foo.hooks.post_run = myhook_postrun foo.hooks.post_run_task = myhook_postrun_task @@ -1572,8 +1572,8 @@ def test_object_input(): def testfunc(a: A): return a.x - result = testfunc(a=A(x=7))() - assert result.output.out == 7 + outputs = testfunc(a=A(x=7))() + assert outputs.out == 7 def test_argstr_formatting(): diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 8c226d0f98..09ed541c47 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -112,9 +112,9 @@ def test_wf_dict_input_and_output_spec(): exc_info, "Could not coerce object, 'bad-value', to any of the union types" ) - result = wf() - assert result.output.a == "any-string" - assert result.output.b == {"foo": 1, "bar": False} + outputs = wf() + assert outputs.a == "any-string" + assert outputs.b == {"foo": 1, "bar": False} def test_wf_name_conflict1(): @@ -3931,11 +3931,11 @@ def test_workflow_combine1(tmpdir): } ) wf1.cache_dir = tmpdir - result = wf1() + outputs = wf1() - assert result.output.out_pow == [1, 1, 4, 8] - assert result.output.out_iden1 == [[1, 4], [1, 8]] - assert result.output.out_iden2 == [[1, 4], [1, 8]] + assert outputs.out_pow == [1, 1, 4, 8] + assert outputs.out_iden1 == [[1, 4], [1, 8]] + assert outputs.out_iden2 == [[1, 4], [1, 8]] def test_workflow_combine2(tmpdir): @@ -3946,10 +3946,10 @@ def test_workflow_combine2(tmpdir): wf1.add(identity(name="identity", x=wf1.power.lzout.out).combine("power.b")) wf1.set_output({"out_pow": wf1.power.lzout.out, "out_iden": wf1.identity.lzout.out}) wf1.cache_dir = tmpdir - result = wf1() + outputs = wf1() - assert result.output.out_pow == [[1, 4], [1, 8]] - assert result.output.out_iden == [[1, 4], [1, 8]] + assert outputs.out_pow == [[1, 4], [1, 8]] + assert outputs.out_iden == [[1, 4], [1, 8]] # testing lzout.all to collect all of the results and let PythonTask deal with it diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index ccca2e70c5..c4c0dd1208 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -750,9 +750,9 @@ def test_typing_implicit_cast_from_super(tmp_path, generic_task, specific_task): in_file = MyFormatX.sample() - result = wf(in_file=in_file, plugin="serial") + outputs = wf(in_file=in_file, plugin="serial") - out_file: MyFormatX = result.output.out_file + out_file: MyFormatX = outputs.out_file assert type(out_file) is MyFormatX assert out_file.parent != in_file.parent assert type(out_file.header) is MyHeader @@ -818,9 +818,9 @@ def test_typing_cast(tmp_path, specific_task, other_specific_task): in_file = MyFormatX.sample() - result = wf(in_file=in_file, plugin="serial") + outputs = wf(in_file=in_file, plugin="serial") - out_file: MyFormatX = result.output.out_file + out_file: MyFormatX = outputs.out_file assert type(out_file) is MyFormatX assert out_file.parent != in_file.parent assert type(out_file.header) is MyHeader From 0a32bfa6767cb29b7f83b08fd29fd61aa9946ec8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 19:46:36 +1100 Subject: [PATCH 154/342] more debugging of unittests --- pydra/design/base.py | 20 +++++++++++---- pydra/design/python.py | 2 +- pydra/design/shell.py | 2 +- pydra/design/tests/test_python.py | 13 +++++----- pydra/design/tests/test_shell.py | 15 +++++------ pydra/design/tests/test_workflow.py | 39 ++++++----------------------- pydra/design/workflow.py | 2 +- 7 files changed, 39 insertions(+), 54 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index ab7b376f81..57828ec56a 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -273,6 +273,8 @@ class Out(Field): def extract_fields_from_class( + spec_type: type["TaskDef"], + outputs_type: type["TaskOutputs"], klass: type, arg_type: type[Arg], out_type: type[Out], @@ -341,6 +343,12 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: ) return fields_dict + if not issubclass(klass, spec_type): + raise ValueError( + f"The canonical form of {spec_type.__module__.split('.')[-1]} task definitions, " + f"{klass}, must inherit from {spec_type}" + ) + inputs = get_fields(klass, arg_type, auto_attribs, input_helps) try: @@ -349,6 +357,12 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: raise AttributeError( f"Nested Outputs class not found in {klass.__name__}" ) from None + if not issubclass(outputs_klass, outputs_type): + raise ValueError( + f"The canonical form of {spec_type.__module__.split('.')[-1]} task definitions, " + f"{klass}, must inherit from {spec_type}" + ) + output_helps, _ = parse_doc_string(outputs_klass.__doc__) outputs = get_fields(outputs_klass, out_type, auto_attribs, output_helps) @@ -394,8 +408,6 @@ def make_task_def( klass : type The class created using the attrs package """ - from pydra.engine.specs import TaskDef - spec_type._check_arg_refs(inputs, outputs) if name is None and klass is not None: @@ -405,9 +417,7 @@ def make_task_def( f"{reserved_names} are reserved and cannot be used for {spec_type} field names" ) outputs_klass = make_outputs_spec(out_type, outputs, outputs_bases, name) - if issubclass(klass, TaskDef) and not issubclass(klass, spec_type): - raise ValueError(f"Cannot change type of definition {klass} to {spec_type}") - if klass is None or not issubclass(klass, spec_type): + if klass is None: if name is None: raise ValueError("name must be provided if klass is not") bases = tuple(bases) diff --git a/pydra/design/python.py b/pydra/design/python.py index 8036e5bc14..7fcbb7af65 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -133,7 +133,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) parsed_inputs, parsed_outputs = extract_fields_from_class( - klass, arg, out, auto_attribs + PythonDef, PythonOutputs, klass, arg, out, auto_attribs ) else: if not isinstance(wrapped, ty.Callable): diff --git a/pydra/design/shell.py b/pydra/design/shell.py index cafeb0b291..f958b90869 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -320,7 +320,7 @@ def make( class_name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) parsed_inputs, parsed_outputs = extract_fields_from_class( - klass, arg, out, auto_attribs + ShellDef, ShellOutputs, klass, arg, out, auto_attribs ) else: if not isinstance(wrapped, str): diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index fd91f7ed9f..ba822b3d2c 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -4,9 +4,8 @@ import attrs import pytest from pydra.engine.helpers import list_fields -from pydra.engine.specs import PythonDef +from pydra.engine.specs import PythonDef, PythonOutputs from pydra.design import python -from pydra.engine.task import PythonTask sort_key = attrgetter("name") @@ -238,7 +237,7 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: def test_interface_with_class(): @python.define - class SampleDef: + class SampleDef(PythonDef["SampleDef.Outputs"]): """Sample class for testing Args: @@ -250,7 +249,7 @@ class SampleDef: a: int b: float = 2.0 - class Outputs: + class Outputs(PythonOutputs): """ Args: c: Sum of a and b @@ -300,7 +299,7 @@ class SampleDef(PythonDef["SampleDef.Outputs"]): a: int b: float - class Outputs: + class Outputs(PythonOutputs): """ Args: c: Sum of a and b @@ -319,13 +318,13 @@ def function(a, b): def test_interface_with_class_no_auto_attribs(): @python.define(auto_attribs=False) - class SampleDef: + class SampleDef(PythonDef["SampleDef.Outputs"]): a: int = python.arg(help="First input to be inputted") b: float = python.arg(help="Second input") x: int - class Outputs: + class Outputs(PythonOutputs): c: float = python.out(help="Sum of a and b") d: float = python.out(help="Product of a and b") diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 9a04eb2b6b..31a5943d87 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -645,7 +645,7 @@ def A(request): if request.param == "static": @shell.define - class A: + class A(ShellDef["A.Outputs"]): """An example shell interface described in a class Parameters @@ -658,7 +658,7 @@ class A: x: File = shell.arg(argstr="", position=1) - class Outputs: + class Outputs(ShellOutputs): """The outputs of the example shell interface Parameters @@ -701,14 +701,14 @@ def test_shell_output_path_template(A): def test_shell_output_field_name_static(): @shell.define - class A: + class A(ShellDef["A.Outputs"]): """Copy a file""" executable = "cp" x: File = shell.arg(help="an input file", argstr="", position=1) - class Outputs: + class Outputs(ShellOutputs): y: File = shell.outarg( help="the output file", path_template="{x}_out", @@ -717,6 +717,7 @@ class Outputs: ) assert sorted([a.name for a in attrs.fields(A) if not a.name.startswith("_")]) == [ + "additional_args", "executable", "x", "y", @@ -839,7 +840,7 @@ class B(A): y: text.Plain = shell.arg() # Override the output arg in A - class Outputs: + class Outputs(ShellOutputs): """ Args: out_file_size: size of the output directory @@ -913,12 +914,12 @@ def test_shell_inputs_outputs_bases_dynamic(tmp_path): def test_shell_inputs_outputs_bases_static(tmp_path): @shell.define - class A: + class A(ShellDef["A.Outputs"]): executable = "ls" directory: Directory = shell.arg(help="input directory", argstr="", position=-1) - class Outputs: + class Outputs(ShellOutputs): entries: list = shell.out( help="list of entries returned by ls command", callable=list_entries, diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index d77832c7bf..2e3a8fda68 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -7,6 +7,7 @@ from pydra.design import shell, python, workflow from pydra.engine.helpers import list_fields from pydra.engine.specs import WorkflowDef, WorkflowOutputs +from pydra.engine.core import Workflow from fileformats import video, image # NB: We use PascalCase for interfaces and workflow functions as it is translated into a class @@ -72,7 +73,7 @@ def MyTestWorkflow(a, b): def test_shell_workflow(): - @workflow.define + @workflow.define(outputs=["output_video"]) def MyTestShellWorkflow( input_video: video.Mp4, watermark: image.Png, @@ -98,7 +99,7 @@ def MyTestShellWorkflow( name="resize", ).out_video - return output_video # test implicit detection of output name + return output_video constructor = MyTestShellWorkflow().constructor assert constructor.__name__ == "MyTestShellWorkflow" @@ -330,7 +331,7 @@ def Sum(x: list[float]) -> float: @workflow.define def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: - mul = workflow.add(Mul()).split(x=a, y=b).combine("x") + mul = workflow.add(Mul().split(x=a, y=b).combine("x")) sum = workflow.add(Sum(x=mul.out)) return sum.out @@ -354,8 +355,8 @@ def Add(x: float, y: float) -> float: @workflow.define def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: - mul = workflow.add(Mul()).split(x=a, y=b) - add = workflow.add(Add(x=mul.out, y=c)).combine("Mul.x") + mul = workflow.add(Mul().split(x=a, y=b)) + add = workflow.add(Add(x=mul.out, y=c).combine("Mul.x")) sum = workflow.add(Sum(x=add.out)) return sum.out @@ -369,32 +370,6 @@ def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: ) -def test_workflow_split_after_access_fail(): - """It isn't possible to split/combine a node after one of its outputs has been type - checked as this changes the type of the outputs and renders the type checking - invalid - """ - - @python.define - def Add(x: float, y: float) -> float: - return x + y - - @python.define - def Mul(x: float, y: float) -> float: - return x * y - - @workflow.define - def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: - - add = workflow.add(Add()) - mul = workflow.add(Mul(x=add.out, y=2.0)) # << Add.out is accessed here - add.split(x=a, y=b).combine("x") - return mul.out - - with pytest.raises(RuntimeError, match="Outputs .* have already been accessed"): - Workflow.construct(MyTestWorkflow(a=[1, 2, 3], b=[1.0, 10.0, 100.0])) - - def test_nested_workflow(): """Simple test of a nested workflow""" @@ -434,7 +409,7 @@ def MyTestWorkflow(a: int, b: float, c: float) -> float: node=wf["NestedWorkflow"], field="out", type=float, type_checked=True ) assert list(wf.node_names) == ["Divide", "NestedWorkflow"] - nwf_spec = copy(wf["NestedWorkflow"]._spec) + nwf_spec = copy(wf["NestedWorkflow"]._definition) nwf_spec.a = 100.0 nwf = Workflow.construct(nwf_spec) nwf.inputs.a == 100.0 diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 6865ed542a..e206aa4300 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -140,7 +140,7 @@ def make(wrapped: ty.Callable | type) -> TaskDef: name = klass.__name__ check_explicit_fields_are_none(klass, inputs, outputs) parsed_inputs, parsed_outputs = extract_fields_from_class( - klass, arg, out, auto_attribs + WorkflowDef, WorkflowOutputs, klass, arg, out, auto_attribs ) else: if not inspect.isfunction(wrapped): From 4ff730369a18077ae86e3d14f0dea23eb61e023d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 20:26:39 +1100 Subject: [PATCH 155/342] fixed up setting of state in workflow constructors --- pydra/engine/node.py | 67 ++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 39 deletions(-) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index bfe484fb45..1ad45dd008 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -54,26 +54,7 @@ class Node(ty.Generic[OutputType]): ) # QUESTION: should this be included in the state? def __attrs_post_init__(self): - # Add node name to state's splitter, combiner and cont_dim loaded from the def - splitter = self._definition._splitter - combiner = self._definition._combiner - if splitter: - splitter = hlpst.add_name_splitter(splitter, self.name) - if combiner: - combiner = hlpst.add_name_combiner(combiner, self.name) - if self._definition._cont_dim: - self._cont_dim = {} - for key, val in self._definition._cont_dim.items(): - self._cont_dim[f"{self.name}.{key}"] = val - self._set_state(splitter=splitter, combiner=combiner) - if combiner: - if not_split := [ - c for c in combiner if not any(c in s for s in self.state.splitter_rpn) - ]: - raise ValueError( - f"Combiner fields {not_split} for Node {self.name!r} are not in the " - f"splitter fields {self.state.splitter_rpn}" - ) + self._set_state() class Inputs: """A class to wrap the inputs of a node and control access to them so lazy fields @@ -101,6 +82,7 @@ def __setattr__(self, name: str, value: ty.Any) -> None: f"cannot set {name!r} input to {value} because it changes the " f"state" ) + self._set_state() @property def inputs(self) -> Inputs: @@ -115,9 +97,6 @@ def state(self): """Initialise the state of the node just after it has been created (i.e. before it has been split or combined) based on the upstream connections """ - if self._state is not NOT_SET: - return self._state - self._set_state(other_states=self._get_upstream_states()) return self._state @property @@ -248,22 +227,20 @@ def _wrap_lzout_types_in_state_arrays(self) -> None: type_ = StateArray[type_] outpt_lf.type = type_ - def _set_state( - self, - splitter: list[str] | tuple[str, ...] | None = None, - combiner: list[str] | None = None, - other_states: dict[str, tuple["State", list[str]]] | None = None, - ) -> None: - if self._state not in (NOT_SET, None): - if splitter is None: - splitter = self._state.current_splitter - if combiner is None: - combiner = self._state.current_combiner - if other_states is None: - other_states = self._state.other_states - if not (splitter or combiner or other_states): - self._state = None - else: + def _set_state(self) -> None: + # Add node name to state's splitter, combiner and cont_dim loaded from the def + splitter = self._definition._splitter + combiner = self._definition._combiner + if splitter: + splitter = hlpst.add_name_splitter(splitter, self.name) + if combiner: + combiner = hlpst.add_name_combiner(combiner, self.name) + if self._definition._cont_dim: + self._cont_dim = {} + for key, val in self._definition._cont_dim.items(): + self._cont_dim[f"{self.name}.{key}"] = val + other_states = self._get_upstream_states() + if splitter or combiner or other_states: self._state = State( self.name, self._definition, @@ -271,6 +248,18 @@ def _set_state( other_states=other_states, combiner=combiner, ) + if combiner: + if not_split := [ + c + for c in combiner + if not any(c in s for s in self.state.splitter_rpn) + ]: + raise ValueError( + f"Combiner fields {not_split} for Node {self.name!r} are not in the " + f"splitter fields {self.state.splitter_rpn}" + ) + else: + self._state = None def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: """Get the states of the upstream nodes that are connected to this node""" From 0e0a02d0db7be3038e8fd5487d5d523f2bd733c1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 29 Jan 2025 20:41:27 +1100 Subject: [PATCH 156/342] changed under_construction so it investigates the call stack instead of using a class variable --- pydra/design/workflow.py | 2 +- pydra/engine/core.py | 103 ++++++++++++++++++++------------------- 2 files changed, 54 insertions(+), 51 deletions(-) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index e206aa4300..928ef4e879 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -200,7 +200,7 @@ def this() -> "Workflow": """ from pydra.engine.core import Workflow - return Workflow.under_construction + return Workflow.under_construction() OutputsType = ty.TypeVar("OutputsType", bound="TaskOutputs") diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 7213881d50..12c2c96965 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -477,7 +477,7 @@ def _combined_output(self, return_inputs=False): return None if return_inputs is True or return_inputs == "val": result = (self.state.states_val[ind], result) - elif return_inputs == "ind": + elif return_inputs is True or return_inputs == "ind": result = (self.state.states_ind[ind], result) combined_results_gr.append(result) combined_results.append(combined_results_gr) @@ -637,7 +637,7 @@ def construct( # Initialise the lzin fields lazy_spec = copy(definition) - wf = cls.under_construction = Workflow( + workflow = Workflow( name=type(definition).__name__, inputs=lazy_spec, outputs=outputs, @@ -647,7 +647,7 @@ def construct( lazy_spec, lzy_inpt.name, LazyInField( - workflow=wf, + workflow=workflow, field=lzy_inpt.name, type=lzy_inpt.type, ), @@ -655,43 +655,58 @@ def construct( input_values = attrs_values(lazy_spec) constructor = input_values.pop("constructor") - cls._under_construction = wf - try: - # Call the user defined constructor to set the outputs - output_lazy_fields = constructor(**input_values) - # Check to see whether any mandatory inputs are not set - for node in wf.nodes: - node._definition._check_rules() - # Check that the outputs are set correctly, either directly by the constructor - # or via returned values that can be zipped with the output names - if output_lazy_fields: - if not isinstance(output_lazy_fields, (list, tuple)): - output_lazy_fields = [output_lazy_fields] - output_fields = list_fields(definition.Outputs) - if len(output_lazy_fields) != len(output_fields): - raise ValueError( - f"Expected {len(output_fields)} outputs, got " - f"{len(output_lazy_fields)} ({output_lazy_fields})" - ) - for outpt, outpt_lf in zip(output_fields, output_lazy_fields): - # Automatically combine any uncombined state arrays into lists - if TypeParser.get_origin(outpt_lf.type) is StateArray: - outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] - setattr(outputs, outpt.name, outpt_lf) - else: - if unset_outputs := [ - a for a, v in attrs_values(outputs).items() if v is attrs.NOTHING - ]: - raise ValueError( - f"Expected outputs {unset_outputs} to be set by the " - f"constructor of {wf!r}" - ) - finally: - cls._under_construction = None + # Call the user defined constructor to set the outputs + output_lazy_fields = constructor(**input_values) + # Check to see whether any mandatory inputs are not set + for node in workflow.nodes: + node._definition._check_rules() + # Check that the outputs are set correctly, either directly by the constructor + # or via returned values that can be zipped with the output names + if output_lazy_fields: + if not isinstance(output_lazy_fields, (list, tuple)): + output_lazy_fields = [output_lazy_fields] + output_fields = list_fields(definition.Outputs) + if len(output_lazy_fields) != len(output_fields): + raise ValueError( + f"Expected {len(output_fields)} outputs, got " + f"{len(output_lazy_fields)} ({output_lazy_fields})" + ) + for outpt, outpt_lf in zip(output_fields, output_lazy_fields): + # Automatically combine any uncombined state arrays into lists + if TypeParser.get_origin(outpt_lf.type) is StateArray: + outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] + setattr(outputs, outpt.name, outpt_lf) + else: + if unset_outputs := [ + a for a, v in attrs_values(outputs).items() if v is attrs.NOTHING + ]: + raise ValueError( + f"Expected outputs {unset_outputs} to be set by the " + f"constructor of {workflow!r}" + ) - cls._constructed[hash_key] = wf + cls._constructed[hash_key] = workflow - return wf + return workflow + + @classmethod + def under_construction(cls) -> "Workflow[ty.Any]": + """Access the under_construction variable by iterating up through the call stack.""" + frame = inspect.currentframe() + while frame: + # Find the frame where the construct method was called + if ( + frame.f_code.co_name == "construct" + and "cls" in frame.f_locals + and frame.f_locals["cls"] is cls + and "workflow" in frame.f_locals + ): + return frame.f_locals["workflow"] # local var "workflow" in construct + frame = frame.f_back + raise RuntimeError( + "No workflow is currently under construction (i.e. did not find a " + "`Workflow.construct` in the current call stack" + ) @classmethod def clear_cache(cls): @@ -733,18 +748,6 @@ def nodes(self) -> ty.Iterable[Node]: def node_names(self) -> list[str]: return list(self._nodes) - @property - @classmethod - def under_construction(cls) -> "Workflow[ty.Any]": - if cls._under_construction is None: - raise ValueError( - "pydra.design.workflow.this() can only be called from within a workflow " - "constructor function (see 'pydra.design.workflow.define')" - ) - return cls._under_construction - - # Used to store the workflow that is currently being constructed - _under_construction: "Workflow[ty.Any]" = None # Used to cache the constructed workflows by their hashed input values _constructed: dict[int, "Workflow[ty.Any]"] = {} From 9c11addc023ae363603dba74b9933dcba9c6b4f5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 12:18:39 +1100 Subject: [PATCH 157/342] implemented AST based function bytes_repr --- pydra/utils/hash.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index a836eaddf3..89e4919070 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -2,6 +2,8 @@ import sys import os +import re +import ast import struct import inspect from datetime import datetime @@ -550,7 +552,6 @@ def bytes_repr_code(obj: types.CodeType, cache: Cache) -> Iterator[bytes]: obj.co_filename, obj.co_freevars, obj.co_name, - obj.co_firstlineno, obj.co_lnotab, obj.co_cellvars, ), @@ -562,9 +563,30 @@ def bytes_repr_code(obj: types.CodeType, cache: Cache) -> Iterator[bytes]: @register_serializer def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes]: """Serialize a function, attempting to use the AST of the source code if available - otherwise falling back to using cloudpickle to serialize the byte-code of the - function.""" - yield from bytes_repr(obj.__code__, cache) + otherwise falling back to the byte-code of the function.""" + yield b"function:(" + try: + src = inspect.getsource(obj) + except OSError: + # Fallback to using the bytes representation of the code object + yield from bytes_repr(obj.__code__, cache) + else: + + def dump_ast(node: ast.AST) -> bytes: + return ast.dump( + node, annotate_fields=False, include_attributes=False + ).encode() + + indent = re.match(r"(\s*)", src).group(1) + if indent: + src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) + func_ast = ast.parse(src).body[0] + yield dump_ast(func_ast.args) + if func_ast.returns: + yield dump_ast(func_ast.returns) + for stmt in func_ast.body: + yield dump_ast(stmt) + yield b")" def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[bytes]: From b8ba7f858c6dab4b5aee60be840073a26228dbb2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 12:23:07 +1100 Subject: [PATCH 158/342] trimmed some unnecessary attributes from the byte-code based function hash --- pydra/utils/hash.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 89e4919070..7e9328cdd0 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -543,16 +543,13 @@ def bytes_repr_code(obj: types.CodeType, cache: Cache) -> Iterator[bytes]: obj.co_posonlyargcount, obj.co_kwonlyargcount, obj.co_nlocals, - obj.co_stacksize, obj.co_flags, obj.co_code, obj.co_consts, obj.co_names, obj.co_varnames, - obj.co_filename, obj.co_freevars, obj.co_name, - obj.co_lnotab, obj.co_cellvars, ), cache, From 1cacaa056c3daa50a0b061b2e0e311191562d36a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 12:23:40 +1100 Subject: [PATCH 159/342] debugging test_functions --- pydra/engine/tests/test_functions.py | 56 ++++++++++++++++------------ 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/pydra/engine/tests/test_functions.py b/pydra/engine/tests/test_functions.py index ee49c6d269..a9474eb263 100644 --- a/pydra/engine/tests/test_functions.py +++ b/pydra/engine/tests/test_functions.py @@ -1,25 +1,31 @@ import pytest import random import typing as ty - +import inspect +import re +import ast from pydra.design import python -from pydra.engine.helpers import list_fields +from pydra.engine.specs import PythonDef, PythonOutputs +from pydra.engine.helpers import list_fields, attrs_values +from pydra.utils.hash import bytes_repr def test_task_equivalence(): - def add_two(a): + """testing equivalence of tasks created in different ways""" + + def add_two(a: int) -> int: return a + 2 @python.define - class Canonical: + class Canonical(PythonDef["Canonical.Outputs"]): a: ty.Any - class Outputs: + class Outputs(PythonOutputs): out: ty.Any @staticmethod - def function(a): + def function(a: int) -> int: return a + 2 canonical = Canonical(a=3) @@ -27,19 +33,20 @@ def function(a): decorated1 = python.define(add_two)(a=3) @python.define - def addtwo(a): + def addtwo(a: int) -> int: return a + 2 decorated2 = addtwo(a=3) - assert canonical.checksum == decorated1.checksum + assert canonical._compute_hashes()[1] == decorated1._compute_hashes()[1] + assert canonical._compute_hashes()[1] == decorated2._compute_hashes()[1] - c_res = canonical._run() - d1_res = decorated1._run() - d2_res = decorated2._run() + c_outputs = canonical() + d1_outputs = decorated1() + d2_outputs = decorated2() - assert c_res.output.hash == d1_res.output.hash - assert c_res.output.hash == d2_res.output.hash + assert attrs_values(c_outputs) == attrs_values(d1_outputs) + assert attrs_values(c_outputs) == attrs_values(d2_outputs) def test_annotation_equivalence_1(): @@ -48,26 +55,29 @@ def test_annotation_equivalence_1(): def direct(a: int) -> int: return a + 2 + Direct = python.define(direct) + @python.define(outputs={"out": int}) - def partial(a: int): + def Partial(a: int): return a + 2 - @python.define(inputs={"a": int}, outputs={"return": int}) - def indirect(a): + @python.define(inputs={"a": int}, outputs={"out": int}) + def Indirect(a): return a + 2 - # checking if the annotations are equivalent - assert direct.__annotations__ == partial.__annotations__ - assert direct.__annotations__ == indirect.__annotations__ + assert list_fields(Direct) == list_fields(Partial) + assert list_fields(Direct) == list_fields(Indirect) + + assert list_fields(Direct.Outputs) == list_fields(Partial.Outputs) + assert list_fields(Direct.Outputs) == list_fields(Indirect.Outputs) # Run functions to ensure behavior is unaffected a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) + assert attrs_values(Direct(a)) == attrs_values(Partial(a)) + assert attrs_values(Direct(a)) == attrs_values(Indirect(a)) # checking if the annotation is properly converted to output_spec if used in task - task_direct = python.define(direct)() - assert list_fields(task_direct.Outputs)[0] == python.out(name="out", type=int) + assert list_fields(Direct.Outputs)[0] == python.out(name="out", type=int) def test_annotation_equivalence_2(): From 2da62fda26b0156aa0f5cd18b34126b55109230b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 12:23:50 +1100 Subject: [PATCH 160/342] debugging test_helpers_file --- pydra/design/base.py | 10 ++++ pydra/design/tests/test_shell.py | 1 + pydra/engine/tests/test_helpers_file.py | 75 +++++++++---------------- 3 files changed, 39 insertions(+), 47 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 57828ec56a..8b45248d54 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -410,6 +410,11 @@ def make_task_def( """ spec_type._check_arg_refs(inputs, outputs) + for inpt in inputs.values(): + set_none_default_if_optional(inpt) + for outpt in inputs.values(): + set_none_default_if_optional(outpt) + if name is None and klass is not None: name = klass.__name__ if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: @@ -981,4 +986,9 @@ def nothing_factory(): return attrs.NOTHING +def set_none_default_if_optional(field: Field) -> None: + if is_optional(field.type) and field.default is EMPTY: + field.default = None + + white_space_re = re.compile(r"\s+") diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 31a5943d87..0fc5ff0170 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -435,6 +435,7 @@ def test_interface_template_with_type_overrides(): name="int_arg", argstr="--int-arg", type=int | None, + default=None, position=5, ), shell.arg( diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 2d9d97cbd3..ab3f0f96f9 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -1,12 +1,11 @@ import typing as ty import sys from pathlib import Path -import attr from unittest.mock import Mock import pytest from fileformats.generic import File -from ..specs import ShellDef -from ..task import ShellTask +from pydra.engine.specs import ShellDef, ShellOutputs +from pydra.design import shell from ..helpers_file import ( ensure_list, MountIndentifier, @@ -354,52 +353,34 @@ def test_output_template(tmp_path): filename = str(tmp_path / "file.txt") with open(filename, "w") as f: f.write("hello from pydra") - in_file = File(filename) - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), - ), - ( - "optional", - attr.ib( - type=ty.Union[Path, bool], - default=False, - metadata={ - "position": 2, - "argstr": "--opt", - "output_file_template": "{in_file}.out", - "help": "optional file output", - }, - ), - ), - ], - bases=(ShellDef,), - ) - class MyCommand(ShellTask): + @shell.define + class MyCommand(ShellDef["MyCommand.Outputs"]): + in_file: File = shell.arg( + position=1, + argstr="", + help="input file", + ) + optional: File | None = shell.outarg( + position=2, + argstr="--opt", + path_template="{in_file}.out", + help="optional file output", + ) + + class Outputs(ShellOutputs): + pass + executable = "my" - input_spec = my_input_spec - - task = MyCommand(in_file=filename) - assert task.cmdline == f"my {filename}" - task.definition.optional = True - assert task.cmdline == f"my {filename} --opt {task.output_dir / 'file.out'}" - task.definition.optional = False - assert task.cmdline == f"my {filename}" - task.definition.optional = "custom-file-out.txt" - assert task.cmdline == f"my {filename} --opt custom-file-out.txt" + + defn = MyCommand(in_file=filename) + assert defn.cmdline == f"my {filename}" + defn.optional = True + assert defn.cmdline == f"my {filename} --opt 'file.out'" + defn.optional = False + assert defn.cmdline == f"my {filename}" + defn.optional = "custom-file-out.txt" + assert defn.cmdline == f"my {filename} --opt custom-file-out.txt" def test_template_formatting(tmp_path): From ff8069fabd9161e8ae6d7922b09be783e51f9b20 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 14:25:32 +1100 Subject: [PATCH 161/342] debugged test_helpers_file --- pydra/design/shell.py | 2 +- pydra/engine/helpers_file.py | 9 +++---- pydra/engine/specs.py | 31 ++++++------------------- pydra/engine/tests/test_helpers_file.py | 13 +++++++---- 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index f958b90869..08432ffeb2 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -669,7 +669,7 @@ def remaining_positions( If multiple fields have the same position """ if num_args is None: - num_args = len(args) + num_args = len(args) - 1 # Subtract 1 for the 'additional_args' field # Check for multiple positions positions = defaultdict(list) for arg in args: diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 46884573d8..97c35a5da4 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -119,10 +119,9 @@ def template_update( for field in list_fields(definition) if isinstance(field, shell.outarg) and field.path_template - and getattr(definition, field.name) is not False + and getattr(definition, field.name) and all( - getattr(definition, required_field) is not None - for required_field in field.requires + getattr(definition, required_field) for required_field in field.requires ) ] @@ -164,7 +163,7 @@ def template_update_single( raise TypeError( f"type of '{field.name}' is Path, consider using Union[Path, bool]" ) - if inp_val_set is not attr.NOTHING and not is_lazy(inp_val_set): + if inp_val_set is not None and not is_lazy(inp_val_set): inp_val_set = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(inp_val_set) elif spec_type == "output": if not TypeParser.contains_type(FileSet, field.type): @@ -252,6 +251,8 @@ def _string_template_formatting(field, template, definition, input_values): if fld_name not in input_values: raise AttributeError(f"{fld_name} is not provided in the input") fld_value = input_values[fld_name] + if isinstance(fld_value, Path): # Remove path + fld_value = fld_value.name if fld_value is attr.NOTHING: # if value is NOTHING, nothing should be added to the command return attr.NOTHING diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 0ac70438d4..d20e524409 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -945,27 +945,14 @@ def _command_args( pos_args.append(self._command_shelltask_executable(field, value)) elif name == "additional_args": continue - elif name == "args": - pos_val = self._command_shelltask_args(field, value) - if pos_val: - pos_args.append(pos_val) else: - if name in modified_inputs: - pos_val = self._command_pos_args( - field=field, - value=value, - inputs=inputs, - root=root, - output_dir=output_dir, - ) - else: - pos_val = self._command_pos_args( - field=field, - value=value, - output_dir=output_dir, - inputs=inputs, - root=root, - ) + pos_val = self._command_pos_args( + field=field, + value=value, + inputs=inputs, + root=root, + output_dir=output_dir, + ) if pos_val: pos_args.append(pos_val) @@ -1024,10 +1011,6 @@ def _command_pos_args( self._positions_provided.append(field.position) - # Shift non-negatives up to allow executable to be 0 - # Shift negatives down to allow args to be -1 - field.position += 1 if field.position >= 0 else -1 - if value and isinstance(value, str): if root: # values from templates value = value.replace(str(output_dir), f"{root}{output_dir}") diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index ab3f0f96f9..65417b9efe 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -1,12 +1,14 @@ import typing as ty import sys +import os from pathlib import Path from unittest.mock import Mock import pytest from fileformats.generic import File from pydra.engine.specs import ShellDef, ShellOutputs from pydra.design import shell -from ..helpers_file import ( +from pydra.engine.helpers import list_fields +from pydra.engine.helpers_file import ( ensure_list, MountIndentifier, copy_nested_files, @@ -376,18 +378,21 @@ class Outputs(ShellOutputs): defn = MyCommand(in_file=filename) assert defn.cmdline == f"my {filename}" defn.optional = True - assert defn.cmdline == f"my {filename} --opt 'file.out'" + file_out_path = os.path.join(os.getcwd(), "file.out") + if " " in file_out_path: + file_out_path = f"'{file_out_path}'" + assert defn.cmdline == f"my {filename} --opt {file_out_path}" defn.optional = False assert defn.cmdline == f"my {filename}" defn.optional = "custom-file-out.txt" assert defn.cmdline == f"my {filename} --opt custom-file-out.txt" -def test_template_formatting(tmp_path): +def test_template_formatting(tmp_path: Path): field = Mock() field.name = "grad" field.argstr = "--grad" - field.metadata = {"output_file_template": ("{in_file}.bvec", "{in_file}.bval")} + field.path_template = ("{in_file}.bvec", "{in_file}.bval") inputs = Mock() inputs_dict = {"in_file": "/a/b/c/file.txt", "grad": True} From 290bdcac97dc88047e9a1e648e014c2b177a6fe4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 15:45:34 +1100 Subject: [PATCH 162/342] renamed test tasks and workflows to PascalCase --- pydra/design/base.py | 2 + pydra/engine/tests/test_functions.py | 105 +-- pydra/engine/tests/test_helpers.py | 12 +- pydra/engine/tests/test_node_task.py | 182 +++--- pydra/engine/tests/test_numpy_examples.py | 6 +- pydra/engine/tests/test_specs.py | 4 +- pydra/engine/tests/test_submitter.py | 36 +- pydra/engine/tests/test_task.py | 4 +- pydra/engine/tests/test_workflow.py | 756 +++++++++++----------- pydra/engine/tests/utils.py | 151 ++--- pydra/utils/hash.py | 14 +- pytest.ini | 2 + 12 files changed, 625 insertions(+), 649 deletions(-) create mode 100644 pytest.ini diff --git a/pydra/design/base.py b/pydra/design/base.py index 8b45248d54..a1f5d4d6cf 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -56,6 +56,8 @@ def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: """Ensure the default value has been coerced into the correct type""" if value is EMPTY or isinstance(value, attrs.Factory): return value + if self_.type is ty.Callable and isinstance(value, ty.Callable): + return value return TypeParser[self_.type](self_.type, label=self_.name)(value) diff --git a/pydra/engine/tests/test_functions.py b/pydra/engine/tests/test_functions.py index a9474eb263..445fdf6951 100644 --- a/pydra/engine/tests/test_functions.py +++ b/pydra/engine/tests/test_functions.py @@ -1,13 +1,22 @@ import pytest import random import typing as ty -import inspect -import re -import ast +from pydra.design.base import Field from pydra.design import python from pydra.engine.specs import PythonDef, PythonOutputs from pydra.engine.helpers import list_fields, attrs_values -from pydra.utils.hash import bytes_repr + + +def non_func_fields(defn: PythonDef) -> list[Field]: + return [f for f in list_fields(defn) if f.name != "function"] + + +def non_func_values(defn: PythonDef) -> dict: + return {n: v for n, v in attrs_values(defn).items() if n != "function"} + + +def hashes(defn: PythonDef) -> dict[str, str]: + return defn._compute_hashes()[1] def test_task_equivalence(): @@ -45,8 +54,11 @@ def addtwo(a: int) -> int: d1_outputs = decorated1() d2_outputs = decorated2() - assert attrs_values(c_outputs) == attrs_values(d1_outputs) - assert attrs_values(c_outputs) == attrs_values(d2_outputs) + assert ( + non_func_values(c_outputs) + == non_func_values(d1_outputs) + == non_func_values(d2_outputs) + ) def test_annotation_equivalence_1(): @@ -65,113 +77,116 @@ def Partial(a: int): def Indirect(a): return a + 2 - assert list_fields(Direct) == list_fields(Partial) - assert list_fields(Direct) == list_fields(Indirect) + assert non_func_fields(Direct) == non_func_fields(Partial) + assert non_func_fields(Direct) == non_func_fields(Indirect) assert list_fields(Direct.Outputs) == list_fields(Partial.Outputs) assert list_fields(Direct.Outputs) == list_fields(Indirect.Outputs) # Run functions to ensure behavior is unaffected a = random.randint(0, (1 << 32) - 3) - assert attrs_values(Direct(a)) == attrs_values(Partial(a)) - assert attrs_values(Direct(a)) == attrs_values(Indirect(a)) + assert non_func_values(Direct(a=a)) == non_func_values(Partial(a=a)) + assert non_func_values(Direct(a=a)) == non_func_values(Indirect(a=a)) # checking if the annotation is properly converted to output_spec if used in task - assert list_fields(Direct.Outputs)[0] == python.out(name="out", type=int) + assert list_fields(Direct.Outputs)[0] == python.out(name="out", type=int, order=0) def test_annotation_equivalence_2(): """testing various ways of annotation: multiple outputs, using a tuple for output annot.""" - def direct(a: int) -> (int, float): + def direct(a: int) -> tuple[int, float]: return a + 2, a + 2.0 - @python.define(outputs={"out": (int, float)}) - def partial(a: int): + Direct = python.define(direct, outputs=["out1", "out2"]) + + @python.define(outputs={"out1": int, "out2": float}) + def Partial(a: int): return a + 2, a + 2.0 - @python.define(inputs={"a": int}) - def indirect(a) -> tuple[int, float]: + @python.define(inputs={"a": int}, outputs=["out1", "out2"]) + def Indirect(a) -> tuple[int, float]: return a + 2, a + 2.0 # checking if the annotations are equivalent - assert direct.__annotations__ == partial.__annotations__ - assert direct.__annotations__ == indirect.__annotations__ + assert ( + non_func_fields(Direct) == non_func_fields(Partial) == non_func_fields(Indirect) + ) # Run functions to ensure behavior is unaffected a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) + assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) # checking if the annotation is properly converted to output_spec if used in task - task_direct = python.define(direct)() - assert task_direct.output_spec.fields == [("out1", int), ("out2", float)] + assert list_fields(Direct.Outputs) == [ + python.out(name="out1", type=int, order=0), + python.out(name="out2", type=float, order=1), + ] def test_annotation_equivalence_3(): """testing various ways of annotation: using dictionary for output annot.""" - @python.define(outputs=["out1"]) def direct(a: int) -> int: return a + 2 - @python.define(inputs={"return": {"out1": int}}) - def partial(a: int): + Direct = python.define(direct, outputs=["out1"]) + + @python.define(outputs={"out1": int}) + def Partial(a: int): return a + 2 @python.define(inputs={"a": int}, outputs={"out1": int}) - def indirect(a): + def Indirect(a): return a + 2 # checking if the annotations are equivalent - assert direct.__annotations__ == partial.__annotations__ - assert direct.__annotations__ == indirect.__annotations__ + assert ( + non_func_fields(Direct) == non_func_fields(Partial) == non_func_fields(Indirect) + ) # Run functions to ensure behavior is unaffected a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) + assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) # checking if the annotation is properly converted to output_spec if used in task - task_direct = python.define(direct)() - assert task_direct.output_spec.fields[0] == ("out1", int) + assert list_fields(Direct.Outputs)[0] == python.out(name="out1", type=int, order=0) def test_annotation_equivalence_4(): """testing various ways of annotation: using ty.NamedTuple for the output""" @python.define(outputs=["sum", "sub"]) - def direct(a: int) -> tuple[int, int]: + def Direct(a: int) -> tuple[int, int]: return a + 2, a - 2 @python.define(outputs={"sum": int, "sub": int}) - def partial(a: int): + def Partial(a: int): return a + 2, a - 2 @python.define(inputs={"a": int}, outputs={"sum": int, "sub": int}) - def indirect(a): + def Indirect(a): return a + 2, a - 2 # checking if the annotations are equivalent assert ( - direct.__annotations__["return"].__annotations__ - == partial.__annotations__["return"].__annotations__ - == indirect.__annotations__["return"].__annotations__ + list_fields(Direct.Outputs) + == list_fields(Partial.Outputs) + == list_fields(Indirect.Outputs) ) assert ( - direct.__annotations__["return"].__name__ - == partial.__annotations__["return"].__name__ - == indirect.__annotations__["return"].__name__ + list_fields(Direct.Outputs) + == list_fields(Partial.Outputs) + == list_fields(Indirect.Outputs) ) # Run functions to ensure behavior is unaffected a = random.randint(0, (1 << 32) - 3) - assert direct(a) == partial(a) - assert direct(a) == indirect(a) + assert Direct(a=a) == Partial(a=a) + assert Direct(a=a) == Indirect(a=a) # checking if the annotation is properly converted to output_spec if used in task - task_direct = python.define(direct)() - assert list_fields(task_direct.Outputs) == [ + assert list_fields(Direct.Outputs) == [ python.arg(name="sum", type=int), python.arg(name="sub", type=int), ] diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 14bb1203ad..78c726e082 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -9,7 +9,7 @@ from unittest.mock import Mock from fileformats.generic import Directory, File from fileformats.core import FileSet -from .utils import multiply, raise_xeq1 +from .utils import Multiply, RaiseXeq1 from ..helpers import ( get_available_cpus, save, @@ -24,7 +24,7 @@ def test_save(tmpdir): outdir = Path(tmpdir) with pytest.raises(ValueError): save(tmpdir) - foo = multiply(name="mult", x=1, y=2) + foo = Multiply(name="mult", x=1, y=2) # save task save(outdir, task=foo) del foo @@ -177,7 +177,7 @@ def test_load_and_run(tmpdir): """testing load_and_run for pickled task""" task_pkl = Path(tmpdir.join("task_main.pkl")) - task = multiply(name="mult", y=10).split(x=[1, 2]) + task = Multiply(name="mult", y=10).split(x=[1, 2]) task.state.prepare_states(inputs=task.inputs) task.state.prepare_inputs() with task_pkl.open("wb") as fp: @@ -195,7 +195,7 @@ def test_load_and_run(tmpdir): def test_load_and_run_exception_load(tmpdir): """testing raising exception and saving info in crashfile when when load_and_run""" task_pkl = Path(tmpdir.join("task_main.pkl")) - raise_xeq1(name="raise").split("x", x=[1, 2]) + RaiseXeq1(name="raise").split("x", x=[1, 2]) with pytest.raises(FileNotFoundError): load_and_run(task_pkl=task_pkl, ind=0) @@ -204,7 +204,7 @@ def test_load_and_run_exception_run(tmpdir): """testing raising exception and saving info in crashfile when when load_and_run""" task_pkl = Path(tmpdir.join("task_main.pkl")) - task = raise_xeq1(name="raise").split("x", x=[1, 2]) + task = RaiseXeq1(name="raise").split("x", x=[1, 2]) task.state.prepare_states(inputs=task.inputs) task.state.prepare_inputs() @@ -236,7 +236,7 @@ def test_load_and_run_wf(tmpdir): wf_pkl = Path(tmpdir.join("wf_main.pkl")) wf = Workflow(name="wf", input_spec=["x", "y"], y=10) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) wf.split("x", x=[1, 2]) wf.set_output([("out", wf.mult.lzout.out)]) diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 21f008fabb..85c67eb8dd 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -12,16 +12,16 @@ from pydra.design import python from .utils import ( - fun_addtwo, - fun_addvar, - fun_addvar_none, - fun_addvar_default, - moment, - fun_div, - fun_dict, - fun_file, - fun_file_list, - op_4var, + FunAddTwo, + FunAddVar, + FunAddVarNone, + FunAddVarDefault, + Moment, + FunDiv, + FunDict, + FunFile, + FunFileList, + Op4Var, ) from ..core import Task @@ -45,7 +45,7 @@ def move2orig(): # Tests for tasks initializations def test_task_init_1(): """task with mandatory arguments only""" - nn = fun_addtwo() + nn = FunAddTwo() assert isinstance(nn, Task) assert nn.name == "fun_addtwo" assert hasattr(nn, "__call__") @@ -53,12 +53,12 @@ def test_task_init_1(): def test_task_init_1a(): with pytest.raises(TypeError): - fun_addtwo("NA") + FunAddTwo("NA") def test_task_init_2(): """task with a name and inputs""" - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(name="NA", a=3) # adding NA to the name of the variable assert getattr(nn.inputs, "a") == 3 assert nn.state is None @@ -77,7 +77,7 @@ def test_task_init_3( if input_type == "array": a_in = np.array(a_in) - nn = fun_addtwo(name="NA").split(splitter=splitter, a=a_in) + nn = FunAddTwo(name="NA").split(splitter=splitter, a=a_in) assert np.allclose(nn.inputs.a, [3, 5]) assert nn.state.splitter == state_splitter @@ -127,7 +127,7 @@ def test_task_init_3a( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = fun_addvar(name="NA").split(splitter=splitter, a=a_in, b=b_in) + nn = FunAddVar(name="NA").split(splitter=splitter, a=a_in, b=b_in) assert np.allclose(nn.inputs.a, [3, 5]) assert np.allclose(nn.inputs.b, [10, 20]) @@ -141,7 +141,7 @@ def test_task_init_3a( def test_task_init_4(): """task with interface splitter and inputs set in the split method""" - nn = fun_addtwo(name="NA") + nn = FunAddTwo(name="NA") nn.split(splitter="a", a=[3, 5]) assert np.allclose(nn.inputs.a, [3, 5]) @@ -155,7 +155,7 @@ def test_task_init_4(): def test_task_init_4b(): """updating splitter using overwrite=True""" - nn = fun_addtwo(name="NA") + nn = FunAddTwo(name="NA") nn.split(splitter="a", a=[1, 2]) nn.split(splitter="a", a=[3, 5], overwrite=True) assert np.allclose(nn.inputs.a, [3, 5]) @@ -170,7 +170,7 @@ def test_task_init_4b(): def test_task_init_4c(): """trying to set splitter twice without using overwrite""" - nn = fun_addvar(name="NA").split(splitter="b", b=[1, 2]) + nn = FunAddVar(name="NA").split(splitter="b", b=[1, 2]) with pytest.raises(Exception) as excinfo: nn.split(splitter="a", a=[3, 5]) assert "splitter has been already set" in str(excinfo.value) @@ -182,7 +182,7 @@ def test_task_init_4d(): """trying to set the same splitter twice without using overwrite if the splitter is the same, the exception shouldn't be raised """ - nn = fun_addtwo(name="NA").split(splitter="a", a=[3, 5]) + nn = FunAddTwo(name="NA").split(splitter="a", a=[3, 5]) nn.split(splitter="a", a=[3, 5]) assert nn.state.splitter == "NA.a" @@ -190,9 +190,7 @@ def test_task_init_4d(): def test_task_init_5(): """task with inputs, splitter and combiner""" nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") + FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") ) assert nn.state.splitter == ["NA.a", "NA.b"] @@ -222,9 +220,7 @@ def test_task_init_5(): def test_task_init_5a(): """updating combiner using overwrite=True""" nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") + FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") ) nn.combine("a", overwrite=True) @@ -255,9 +251,7 @@ def test_task_init_5a(): def test_task_init_5b(): """updating combiner without using overwrite""" nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") + FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") ) with pytest.raises(Exception) as excinfo: nn.combine("a") @@ -271,9 +265,7 @@ def test_task_init_5c(): if the combiner is the same, the exception shouldn't be raised """ nn = ( - fun_addvar(name="NA") - .split(splitter=["a", "b"], a=[3, 5], b=[1, 2]) - .combine("b") + FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") ) nn.combine("b") @@ -287,7 +279,7 @@ def test_task_init_5c(): def test_task_init_6(): """task with splitter, but the input is an empty list""" - nn = fun_addtwo(name="NA") + nn = FunAddTwo(name="NA") nn.split(splitter="a", a=[]) assert nn.inputs.a == [] @@ -309,7 +301,7 @@ def test_task_init_7(tmp_path): with open(file2, "w") as f: f.write("from pydra\n") - nn1 = fun_file_list(name="NA", filename_list=[file1, file2]) + nn1 = FunFileList(name="NA", filename_list=[file1, file2]) output_dir1 = nn1.output_dir # changing the content of the file @@ -318,7 +310,7 @@ def test_task_init_7(tmp_path): with open(file2, "w") as f: f.write("from pydra") - nn2 = fun_file_list(name="NA", filename_list=[file1, file2]) + nn2 = FunFileList(name="NA", filename_list=[file1, file2]) output_dir2 = nn2.output_dir # the checksum should be different - content of file2 is different @@ -327,23 +319,23 @@ def test_task_init_7(tmp_path): def test_task_init_8(): """task without setting the input, the value should be set to attr.NOTHING""" - nn = fun_addtwo(name="NA") + nn = FunAddTwo(name="NA") assert nn.inputs.a is attr.NOTHING def test_task_init_9(): """task without setting the input, but using the default avlue from function""" - nn1 = fun_addvar_default(name="NA", a=2) + nn1 = FunAddVarDefault(name="NA", a=2) assert nn1.inputs.b == 1 - nn2 = fun_addvar_default(name="NA", a=2, b=1) + nn2 = FunAddVarDefault(name="NA", a=2, b=1) assert nn2.inputs.b == 1 # both tasks should have the same checksum assert nn1.checksum == nn2.checksum def test_task_error(): - func = fun_div(name="div", a=1, b=0) + func = FunDiv(name="div", a=1, b=0) with pytest.raises(ZeroDivisionError): func() assert (func.output_dir / "_error.pklz").exists() @@ -353,7 +345,7 @@ def test_odir_init(): """checking if output_dir is available for a task without init before running the task """ - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(name="NA", a=3) assert nn.output_dir @@ -363,7 +355,7 @@ def test_odir_init(): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1(plugin_dask_opt, tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(name="NA", a=3) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -392,7 +384,7 @@ def test_task_nostate_1(plugin_dask_opt, tmp_path): def test_task_nostate_1_call(): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(name="NA", a=3) nn() # checking the results results = nn.result() @@ -404,7 +396,7 @@ def test_task_nostate_1_call(): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(name="NA", a=3) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -422,7 +414,7 @@ def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): """task without splitter""" - nn = fun_addtwo(name="NA", a=3) + nn = FunAddTwo(name="NA", a=3) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -438,7 +430,7 @@ def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): def test_task_nostate_1_call_updateinp(): """task without splitter""" - nn = fun_addtwo(name="NA", a=30) + nn = FunAddTwo(name="NA", a=30) # updating input when calling the node nn(a=3) @@ -451,7 +443,7 @@ def test_task_nostate_1_call_updateinp(): def test_task_nostate_2(plugin, tmp_path): """task with a list as an input, but no splitter""" - nn = moment(name="NA", n=3, lst=[2, 3, 4]) + nn = Moment(name="NA", n=3, lst=[2, 3, 4]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [3]) assert np.allclose(nn.inputs.lst, [2, 3, 4]) @@ -469,7 +461,7 @@ def test_task_nostate_2(plugin, tmp_path): def test_task_nostate_3(plugin, tmp_path): """task with a dictionary as an input""" - nn = fun_dict(name="NA", d={"a": "ala", "b": "bala"}) + nn = FunDict(name="NA", d={"a": "ala", "b": "bala"}) nn.cache_dir = tmp_path assert nn.inputs.d == {"a": "ala", "b": "bala"} @@ -489,7 +481,7 @@ def test_task_nostate_4(plugin, tmp_path): with open(file1, "w") as f: f.write("hello from pydra\n") - nn = fun_file(name="NA", filename=file1) + nn = FunFile(name="NA", filename=file1) nn.cache_dir = tmp_path with Submitter(plugin) as sub: @@ -512,7 +504,7 @@ def test_task_nostate_5(tmp_path): with open(file2, "w") as f: f.write("from pydra\n") - nn = fun_file_list(name="NA", filename_list=[file1, file2]) + nn = FunFileList(name="NA", filename_list=[file1, file2]) nn() @@ -525,7 +517,7 @@ def test_task_nostate_5(tmp_path): def test_task_nostate_6(): """checking if the function gets the None value""" - nn = fun_addvar_none(name="NA", a=2, b=None) + nn = FunAddVarNone(name="NA", a=2, b=None) assert nn.inputs.b is None nn() assert nn.result().output.out == 2 @@ -533,7 +525,7 @@ def test_task_nostate_6(): def test_task_nostate_6a_exception(): """checking if the function gets the attr.Nothing value""" - nn = fun_addvar_none(name="NA", a=2) + nn = FunAddVarNone(name="NA", a=2) assert nn.inputs.b is attr.NOTHING with pytest.raises(TypeError) as excinfo: nn() @@ -542,7 +534,7 @@ def test_task_nostate_6a_exception(): def test_task_nostate_7(): """using the default value from the function for b input""" - nn = fun_addvar_default(name="NA", a=2) + nn = FunAddVarDefault(name="NA", a=2) assert nn.inputs.b == 1 nn() assert nn.result().output.out == 3 @@ -556,7 +548,7 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): """task with provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -575,7 +567,7 @@ def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): cache_dir = "test_task_nostate" (tmp_path / cache_dir).mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -600,11 +592,11 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn2) @@ -628,11 +620,11 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) with Submitter(worker=plugin) as sub: sub(nn2, rerun=True) @@ -655,10 +647,10 @@ def test_task_nostate_cachelocations_nosubmitter(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) nn() - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) nn2() # checking the results @@ -681,10 +673,10 @@ def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) nn() - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) nn2(rerun=True) # checking the results @@ -710,11 +702,11 @@ def test_task_nostate_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = fun_addtwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) # updating cache location to non-existing dir with Submitter(worker=plugin) as sub: sub(nn2, cache_locations=cache_dir1) @@ -739,7 +731,7 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): if input_type == "array": a_in = np.array(a_in) - nn = fun_addtwo(name="NA").split(splitter="a", a=a_in) + nn = FunAddTwo(name="NA").split(splitter="a", a=a_in) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" @@ -778,7 +770,7 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): def test_task_state_1a(plugin, tmp_path): """task with the simplest splitter (inputs set separately)""" - nn = fun_addtwo(name="NA") + nn = FunAddTwo(name="NA") nn.split(splitter="a", a=[1, 2]) nn.inputs.a = StateArray([3, 5]) nn.cache_dir = tmp_path @@ -801,7 +793,7 @@ def test_task_state_singl_1(plugin, tmp_path): """Tasks with two inputs and a splitter (no combiner) one input is a single value, the other is in the splitter and combiner """ - nn = fun_addvar(name="NA").split(splitter="a", a=[3, 5], b=10) + nn = FunAddVar(name="NA").split(splitter="a", a=[3, 5], b=10) nn.cache_dir = tmp_path assert nn.inputs.a == [3, 5] @@ -871,7 +863,7 @@ def test_task_state_2( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = fun_addvar(name="NA").split(splitter=splitter, a=a_in, b=b_in) + nn = FunAddVar(name="NA").split(splitter=splitter, a=a_in, b=b_in) nn.cache_dir = tmp_path assert (nn.inputs.a == np.array([3, 5])).all() @@ -911,7 +903,7 @@ def test_task_state_2( def test_task_state_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" - nn = fun_addtwo(name="NA").split(splitter="a", a=[]) + nn = FunAddTwo(name="NA").split(splitter="a", a=[]) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" @@ -936,7 +928,7 @@ def test_task_state_4(plugin, input_type, tmp_path): lst_in = [[2, 3, 4], [1, 2, 3]] if input_type == "array": lst_in = np.array(lst_in, dtype=int) - nn = moment(name="NA", n=3).split(splitter="lst", lst=lst_in) + nn = Moment(name="NA", n=3).split(splitter="lst", lst=lst_in) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, 3) @@ -965,7 +957,7 @@ def test_task_state_4(plugin, input_type, tmp_path): def test_task_state_4a(plugin, tmp_path): """task with a tuple as an input, and a simple splitter""" - nn = moment(name="NA", n=3).split(splitter="lst", lst=[(2, 3, 4), (1, 2, 3)]) + nn = Moment(name="NA", n=3).split(splitter="lst", lst=[(2, 3, 4), (1, 2, 3)]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, 3) @@ -987,7 +979,7 @@ def test_task_state_4a(plugin, tmp_path): def test_task_state_5(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter""" - nn = moment(name="NA").split( + nn = Moment(name="NA").split( splitter=("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] ) nn.cache_dir = tmp_path @@ -1013,7 +1005,7 @@ def test_task_state_5_exception(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter the shapes are not matching, so exception should be raised """ - nn = moment(name="NA").split( + nn = Moment(name="NA").split( splitter=("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]] ) nn.cache_dir = tmp_path @@ -1030,7 +1022,7 @@ def test_task_state_5_exception(plugin, tmp_path): def test_task_state_6(plugin, tmp_path): """ask with a list as an input, and the variable is part of the outer splitter""" - nn = moment(name="NA").split( + nn = Moment(name="NA").split( splitter=["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] ) nn.cache_dir = tmp_path @@ -1054,7 +1046,7 @@ def test_task_state_6(plugin, tmp_path): def test_task_state_6a(plugin, tmp_path): """ask with a tuple as an input, and the variable is part of the outer splitter""" - nn = moment(name="NA").split( + nn = Moment(name="NA").split( splitter=["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)] ) nn.cache_dir = tmp_path @@ -1079,7 +1071,7 @@ def test_task_state_6a(plugin, tmp_path): @pytest.mark.flaky(reruns=2) # when dask def test_task_state_comb_1(plugin_dask_opt, tmp_path): """task with the simplest splitter and combiner""" - nn = fun_addtwo(name="NA").split(a=[3, 5], splitter="a").combine(combiner="a") + nn = FunAddTwo(name="NA").split(a=[3, 5], splitter="a").combine(combiner="a") nn.cache_dir = tmp_path assert (nn.inputs.a == np.array([3, 5])).all() @@ -1216,7 +1208,7 @@ def test_task_state_comb_2( ): """Tasks with scalar and outer splitters and partial or full combiners""" nn = ( - fun_addvar(name="NA") + FunAddVar(name="NA") .split(a=[3, 5], b=[10, 20], splitter=splitter) .combine(combiner=combiner) ) @@ -1264,7 +1256,7 @@ def test_task_state_comb_singl_1(plugin, tmp_path): """Tasks with two inputs; one input is a single value, the other is in the splitter and combiner """ - nn = fun_addvar(name="NA").split(splitter="a", a=[3, 5], b=10).combine(combiner="a") + nn = FunAddVar(name="NA").split(splitter="a", a=[3, 5], b=10).combine(combiner="a") nn.cache_dir = tmp_path assert nn.inputs.a == [3, 5] @@ -1292,7 +1284,7 @@ def test_task_state_comb_singl_1(plugin, tmp_path): def test_task_state_comb_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" - nn = fun_addtwo(name="NA").split(splitter="a", a=[]).combine(combiner=["a"]) + nn = FunAddTwo(name="NA").split(splitter="a", a=[]).combine(combiner=["a"]) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" @@ -1318,7 +1310,7 @@ def test_task_state_comb_order(): # single combiner "a" - will create two lists, first one for b=3, second for b=5 nn_a = ( - fun_addvar(name="NA") + FunAddVar(name="NA") .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) .combine(combiner="a") ) @@ -1330,7 +1322,7 @@ def test_task_state_comb_order(): # single combiner "b" - will create two lists, first one for a=10, second for a=20 nn_b = ( - fun_addvar(name="NA") + FunAddVar(name="NA") .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) .combine(combiner="b") ) @@ -1342,7 +1334,7 @@ def test_task_state_comb_order(): # combiner with both fields ["a", "b"] - will create one list nn_ab = ( - fun_addvar(name="NA") + FunAddVar(name="NA") .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) .combine(combiner=["a", "b"]) ) @@ -1356,7 +1348,7 @@ def test_task_state_comb_order(): # combiner with both fields ["b", "a"] - will create the same list as nn_ab # no difference in the order for setting combiner nn_ba = ( - fun_addvar(name="NA") + FunAddVar(name="NA") .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) .combine(combiner=["b", "a"]) ) @@ -1372,7 +1364,7 @@ def test_task_state_comb_order(): def test_task_state_contdim_1(tmp_path): """task with a spliter and container dimension for one of the value""" - task_4var = op_4var( + task_4var = Op4Var( name="op_4var", a="a1", cache_dir=tmp_path, @@ -1392,7 +1384,7 @@ def test_task_state_contdim_1(tmp_path): def test_task_state_contdim_2(tmp_path): """task with a splitter and container dimension for one of the value""" - task_4var = op_4var( + task_4var = Op4Var( name="op_4var", cache_dir=tmp_path, ) @@ -1412,7 +1404,7 @@ def test_task_state_contdim_2(tmp_path): def test_task_state_comb_contdim_1(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" - task_4var = op_4var( + task_4var = Op4Var( name="op_4var", a="a1", cache_dir=tmp_path, @@ -1432,7 +1424,7 @@ def test_task_state_comb_contdim_1(tmp_path): def test_task_state_comb_contdim_2(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" - task_4var = op_4var( + task_4var = Op4Var( name="op_4var", cache_dir=tmp_path, ) @@ -1458,7 +1450,7 @@ def test_task_state_cachedir(plugin_dask_opt, tmp_path): """task with a state and provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) assert nn.state.splitter == "NA.a" assert (nn.inputs.a == np.array([3, 5])).all() @@ -1483,11 +1475,11 @@ def test_task_state_cachelocations(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = fun_addtwo( + nn2 = FunAddTwo( name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir ).split(splitter="a", a=[3, 5]) with Submitter(worker=plugin) as sub: @@ -1514,11 +1506,11 @@ def test_task_state_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = fun_addtwo( + nn2 = FunAddTwo( name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir ).split(splitter="a", a=[3, 5]) with Submitter(worker=plugin) as sub: @@ -1549,11 +1541,11 @@ def test_task_state_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = fun_addtwo(name="NA", cache_dir=cache_dir2, cache_locations=cache_dir).split( + nn2 = FunAddTwo(name="NA", cache_dir=cache_dir2, cache_locations=cache_dir).split( splitter="a", a=[3, 5] ) with Submitter(worker=plugin) as sub: @@ -1587,11 +1579,11 @@ def test_task_files_cachelocations(plugin_dask_opt, tmp_path): input2 = input_dir / "input2.txt" input2.write_text("test") - nn = fun_file(name="NA", filename=input1, cache_dir=cache_dir) + nn = FunFile(name="NA", filename=input1, cache_dir=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn) - nn2 = fun_file( + nn2 = FunFile( name="NA", filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir ) with Submitter(worker=plugin_dask_opt) as sub: diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index e0ecb93dc6..cec176deaf 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -8,7 +8,7 @@ from ..submitter import Submitter from pydra.design import python, workflow -from .utils import identity +from .utils import Identity from pydra.utils.hash import hash_function if importlib.util.find_spec("numpy") is None: @@ -81,7 +81,7 @@ def test_numpy_hash_3(): def test_task_numpyinput_1(tmp_path: Path): """task with numeric numpy array as an input""" - nn = identity(name="NA") + nn = Identity(name="NA") nn.cache_dir = tmp_path nn.split(x=[np.array([1, 2]), np.array([3, 4])]) # checking the results @@ -92,7 +92,7 @@ def test_task_numpyinput_1(tmp_path: Path): def test_task_numpyinput_2(tmp_path: Path): """task with numpy array of type object as an input""" - nn = identity(name="NA") + nn = Identity(name="NA") nn.cache_dir = tmp_path nn.split(x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)]) # checking the results diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 757c94d728..088ea22931 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -19,7 +19,7 @@ from pydra.utils.typing import StateArray # from ..helpers import make_klass -from .utils import foo +from .utils import Foo from pydra.design import python import pytest @@ -344,7 +344,7 @@ def test_input_file_hash_5(tmp_path): def test_lazy_field_cast(): - task = foo(a="a", b=1, c=2.0, name="foo") + task = Foo(a="a", b=1, c=2.0, name="foo") assert task.lzout.y.type == int assert task.lzout.y.cast(float).type == float diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 9150ad0087..909d1c0649 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -13,9 +13,9 @@ from .utils import ( need_sge, need_slurm, - gen_basic_wf, - gen_basic_wf_with_threadcount, - gen_basic_wf_with_threadcount_concurrent, + BasicWorkflow, + BasicWorkflowWithThreadCount, + BasicWorkflowWithThreadCountConcurrent, ) from ..core import Task from ..submitter import Submitter @@ -32,30 +32,30 @@ def sleep_add_one(x): def test_callable_wf(plugin, tmpdir): - wf = gen_basic_wf() + wf = BasicWorkflow() res = wf() assert res.output.out == 9 del wf, res # providing plugin - wf = gen_basic_wf() + wf = BasicWorkflow() res = wf(plugin="cf") assert res.output.out == 9 del wf, res # providing plugin_kwargs - wf = gen_basic_wf() + wf = BasicWorkflow() res = wf(plugin="cf", plugin_kwargs={"n_procs": 2}) assert res.output.out == 9 del wf, res # providing wrong plugin_kwargs - wf = gen_basic_wf() + wf = BasicWorkflow() with pytest.raises(TypeError, match="an unexpected keyword argument"): wf(plugin="cf", plugin_kwargs={"sbatch_args": "-N2"}) # providing submitter - wf = gen_basic_wf() + wf = BasicWorkflow() wf.cache_dir = tmpdir sub = Submitter(plugin) res = wf(submitter=sub) @@ -178,14 +178,14 @@ def test_wf_with_state(plugin_dask_opt, tmpdir): def test_serial_wf(): # Use serial plugin to execute workflow instead of CF - wf = gen_basic_wf() + wf = BasicWorkflow() res = wf(plugin="serial") assert res.output.out == 9 @need_slurm def test_slurm_wf(tmpdir): - wf = gen_basic_wf() + wf = BasicWorkflow() wf.cache_dir = tmpdir # submit workflow and every task as slurm job with Submitter("slurm") as sub: @@ -202,7 +202,7 @@ def test_slurm_wf(tmpdir): @need_slurm def test_slurm_wf_cf(tmpdir): # submit entire workflow as single job executing with cf worker - wf = gen_basic_wf() + wf = BasicWorkflow() wf.cache_dir = tmpdir wf.plugin = "cf" with Submitter("slurm") as sub: @@ -220,7 +220,7 @@ def test_slurm_wf_cf(tmpdir): @need_slurm def test_slurm_wf_state(tmpdir): - wf = gen_basic_wf() + wf = BasicWorkflow() wf.split("x", x=[5, 6]) wf.cache_dir = tmpdir with Submitter("slurm") as sub: @@ -395,7 +395,7 @@ def test_slurm_cancel_rerun_2(tmpdir): @need_sge def test_sge_wf(tmpdir): """testing that a basic workflow can be run with the SGEWorker""" - wf = gen_basic_wf() + wf = BasicWorkflow() wf.cache_dir = tmpdir # submit workflow and every task as sge job with Submitter( @@ -416,7 +416,7 @@ def test_sge_wf_cf(tmpdir): """testing the SGEWorker can submit SGE tasks while the workflow uses the concurrent futures plugin""" # submit entire workflow as single job executing with cf worker - wf = gen_basic_wf() + wf = BasicWorkflow() wf.cache_dir = tmpdir wf.plugin = "cf" with Submitter("sge") as sub: @@ -435,7 +435,7 @@ def test_sge_wf_cf(tmpdir): @need_sge def test_sge_wf_state(tmpdir): """testing the SGEWorker can be used with a workflow with state""" - wf = gen_basic_wf() + wf = BasicWorkflow() wf.split("x") wf.inputs.x = [5, 6] wf.cache_dir = tmpdir @@ -469,7 +469,7 @@ def qacct_output_to_dict(qacct_output): def test_sge_set_threadcount(tmpdir): """testing the number of threads for an SGEWorker task can be set using the input_spec variable sgeThreads""" - wf = gen_basic_wf_with_threadcount() + wf = BasicWorkflowWithThreadCount() wf.inputs.x = 5 wf.cache_dir = tmpdir @@ -499,7 +499,7 @@ def test_sge_set_threadcount(tmpdir): def test_sge_limit_maxthreads(tmpdir): """testing the ability to limit the number of threads used by the SGE at one time with the max_threads argument to SGEWorker""" - wf = gen_basic_wf_with_threadcount_concurrent() + wf = BasicWorkflowWithThreadCountConcurrent() wf.inputs.x = [5, 6] wf.split("x") wf.cache_dir = tmpdir @@ -543,7 +543,7 @@ def test_sge_limit_maxthreads(tmpdir): def test_sge_no_limit_maxthreads(tmpdir): """testing unlimited threads can be used at once by SGE when max_threads is not set""" - wf = gen_basic_wf_with_threadcount_concurrent() + wf = BasicWorkflowWithThreadCountConcurrent() wf.inputs.x = [5, 6] wf.split("x") wf.cache_dir = tmpdir diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 339a7bc1e7..761b0fca57 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -11,7 +11,7 @@ from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages from ..task import AuditFlag, ShellTask from pydra.engine.specs import argstr_formatting -from .utils import gen_basic_wf +from .utils import BasicWorkflow from pydra.utils.typing import ( MultiInputObj, MultiOutputObj, @@ -1374,7 +1374,7 @@ def myhook(task, *args): # but can be shared across tasks bar.hooks = foo.hooks # and workflows - wf = gen_basic_wf() + wf = BasicWorkflow() wf.tmpdir = tmpdir wf.hooks = bar.hooks assert foo.hooks == bar.hooks == wf.hooks diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 09ed541c47..3cf124defa 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -5,32 +5,32 @@ import attr from pathlib import Path from .utils import ( - add2, - add2_wait, - multiply, - multiply_list, - multiply_mixed, - power, - ten, - identity, - identity_2flds, - list_output, - fun_addsubvar, - fun_addvar3, - fun_addvar, - fun_addtwo, - add2_sub2_res, - add2_sub2_res_list, - fun_addvar_none, - fun_addvar_default, - fun_addvar_default_notype, - fun_addvar_notype, - fun_addtwo_notype, - fun_write_file, - fun_write_file_list, - fun_write_file_list2dict, - list_sum, - list_mult_sum, + Add2, + Add2Wait, + Multiply, + MultiplyList, + MultiplyMixed, + Power, + Ten, + Identity, + Identity2Flds, + ListOutput, + FunAddSubVar, + FunAddVar3, + FunAddVar, + FunAddTwo, + Add2Sub2Res, + Add2Sub2ResList, + FunAddVarNone, + FunAddVarDefault, + FunAddVarDefaultNoType, + FunAddVarNoType, + FunAddTwoNoType, + FunWriteFile, + FunWriteFileList, + FunWriteFileList2Dict, + ListSum, + ListMultSum, DOT_FLAG, ) from ..submitter import Submitter @@ -85,7 +85,7 @@ def test_wf_dict_input_and_output_spec(): output_spec=definition, ) wf.add( - identity_2flds( + Identity2Flds( name="identity", x1=wf.lzin.a, x2=wf.lzin.b, @@ -130,16 +130,16 @@ def test_wf_name_conflict1(): def test_wf_name_conflict2(): """raise error when a task with the same name is already added to workflow""" wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="task_name", x=wf.lzin.x)) + wf.add(Add2(name="task_name", x=wf.lzin.x)) with pytest.raises(ValueError) as excinfo: - wf.add(identity(name="task_name", x=3)) + wf.add(Identity(name="task_name", x=3)) assert "Another task named task_name is already added" in str(excinfo.value) def test_wf_no_output(plugin, tmpdir): """Raise error when output isn't set with set_output""" wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.inputs.x = 2 with pytest.raises(ValueError) as excinfo: @@ -151,7 +151,7 @@ def test_wf_no_output(plugin, tmpdir): def test_wf_1(plugin, tmpdir): """workflow with one task and no splitter""" wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.cache_dir = tmpdir @@ -171,7 +171,7 @@ def test_wf_1a_outpastuple(plugin, tmpdir): set_output takes a tuple """ wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output(("out", wf.add2.lzout.out)) wf.inputs.x = 2 wf.plugin = plugin @@ -188,7 +188,7 @@ def test_wf_1a_outpastuple(plugin, tmpdir): def test_wf_1_call_subm(plugin, tmpdir): """using wf.__call_ with submitter""" wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.cache_dir = tmpdir @@ -204,7 +204,7 @@ def test_wf_1_call_subm(plugin, tmpdir): def test_wf_1_call_plug(plugin, tmpdir): """using wf.__call_ with plugin""" wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.plugin = plugin @@ -220,7 +220,7 @@ def test_wf_1_call_plug(plugin, tmpdir): def test_wf_1_call_noplug_nosubm(plugin, tmpdir): """using wf.__call_ without plugin or submitter""" wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.cache_dir = tmpdir @@ -234,7 +234,7 @@ def test_wf_1_call_noplug_nosubm(plugin, tmpdir): def test_wf_1_call_exception(plugin, tmpdir): """using wf.__call_ with plugin and submitter - should raise an exception""" wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.plugin = plugin @@ -249,7 +249,7 @@ def test_wf_1_call_exception(plugin, tmpdir): def test_wf_1_inp_in_call(tmpdir): """Defining input in __call__""" wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 1 results = wf(x=2) @@ -259,7 +259,7 @@ def test_wf_1_inp_in_call(tmpdir): def test_wf_1_upd_in_run(tmpdir): """Updating input in __call__""" wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 1 results = wf(x=2) @@ -269,8 +269,8 @@ def test_wf_1_upd_in_run(tmpdir): def test_wf_2(plugin, tmpdir): """workflow with 2 tasks, no splitter""" wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.inputs.y = 3 @@ -289,8 +289,8 @@ def test_wf_2a(plugin, tmpdir): creating add2_task first (before calling add method), """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + add2_task = Add2(name="add2") add2_task.inputs.x = wf.mult.lzout.out wf.add(add2_task) wf.set_output([("out", wf.add2.lzout.out)]) @@ -312,8 +312,8 @@ def test_wf_2b(plugin, tmpdir): adding inputs.x after add method """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + add2_task = Add2(name="add2") wf.add(add2_task) add2_task.inputs.x = wf.mult.lzout.out wf.set_output([("out", wf.add2.lzout.out)]) @@ -335,8 +335,8 @@ def test_wf_2c_multoutp(plugin, tmpdir): setting multiple outputs for the workflow """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + add2_task = Add2(name="add2") add2_task.inputs.x = wf.mult.lzout.out wf.add(add2_task) # setting multiple output (from both nodes) @@ -360,8 +360,8 @@ def test_wf_2d_outpasdict(plugin, tmpdir): setting multiple outputs using a dictionary """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = add2(name="add2") + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + add2_task = Add2(name="add2") add2_task.inputs.x = wf.mult.lzout.out wf.add(add2_task) # setting multiple output (from both nodes) @@ -384,8 +384,8 @@ def test_wf_2d_outpasdict(plugin, tmpdir): def test_wf_3(plugin_dask_opt, tmpdir): """testing None value for an input""" wf = Workflow(name="wf_3", input_spec=["x", "y"]) - wf.add(fun_addvar_none(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) + wf.add(FunAddVarNone(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.inputs.y = None @@ -405,8 +405,8 @@ def test_wf_3a_exception(plugin, tmpdir): and the function should raise an exception """ wf = Workflow(name="wf_3", input_spec=["x", "y"]) - wf.add(fun_addvar_none(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) + wf.add(FunAddVarNone(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.inputs.y = attr.NOTHING @@ -422,8 +422,8 @@ def test_wf_3a_exception(plugin, tmpdir): def test_wf_4(plugin, tmpdir): """wf with a task that doesn't set one input and use the function default value""" wf = Workflow(name="wf_4", input_spec=["x", "y"]) - wf.add(fun_addvar_default(name="addvar", a=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) + wf.add(FunAddVarDefault(name="addvar", a=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.cache_dir = tmpdir @@ -442,8 +442,8 @@ def test_wf_4a(plugin, tmpdir): so the task should use the function default value """ wf = Workflow(name="wf_4a", input_spec=["x", "y"]) - wf.add(fun_addvar_default(name="addvar", a=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.addvar.lzout.out)) + wf.add(FunAddVarDefault(name="addvar", a=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.cache_dir = tmpdir @@ -461,7 +461,7 @@ def test_wf_5(plugin, tmpdir): one set_output """ wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(fun_addsubvar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) + wf.add(FunAddSubVar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) wf.set_output([("out_sum", wf.addsub.lzout.sum), ("out_sub", wf.addsub.lzout.sub)]) wf.cache_dir = tmpdir @@ -478,7 +478,7 @@ def test_wf_5a(plugin, tmpdir): set_output set twice """ wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(fun_addsubvar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) + wf.add(FunAddSubVar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) wf.set_output([("out_sum", wf.addsub.lzout.sum)]) wf.set_output([("out_sub", wf.addsub.lzout.sub)]) wf.cache_dir = tmpdir @@ -494,7 +494,7 @@ def test_wf_5a(plugin, tmpdir): def test_wf_5b_exception(tmpdir): """set_output used twice with the same name - exception should be raised""" wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(fun_addsubvar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) + wf.add(FunAddSubVar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) wf.set_output([("out", wf.addsub.lzout.sum)]) wf.cache_dir = tmpdir @@ -507,8 +507,8 @@ def test_wf_6(plugin, tmpdir): one set_output """ wf = Workflow(name="wf_6", input_spec=["x", "y"], x=2, y=3) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.set_output([("out1", wf.mult.lzout.out), ("out2", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -526,8 +526,8 @@ def test_wf_6a(plugin, tmpdir): set_output used twice """ wf = Workflow(name="wf_6", input_spec=["x", "y"], x=2, y=3) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.set_output([("out1", wf.mult.lzout.out)]) wf.set_output([("out2", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -544,7 +544,7 @@ def test_wf_6a(plugin, tmpdir): def test_wf_st_1(plugin, tmpdir): """Workflow with one task, a splitter for the workflow""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -568,7 +568,7 @@ def test_wf_st_1(plugin, tmpdir): def test_wf_st_1_call_subm(plugin, tmpdir): """Workflow with one task, a splitter for the workflow""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -592,7 +592,7 @@ def test_wf_st_1_call_plug(plugin, tmpdir): using Workflow.__call__(plugin) """ wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -615,7 +615,7 @@ def test_wf_st_1_call_selfplug(plugin, tmpdir): using Workflow.__call__() and using self.plugin """ wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -639,7 +639,7 @@ def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): (a submitter should be created within the __call__ function) """ wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -661,7 +661,7 @@ def test_wf_st_1_inp_in_call(tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( "x", x=[1, 2] ) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) results = wf() assert results[0].output.out == 3 @@ -673,7 +673,7 @@ def test_wf_st_1_upd_inp_call(tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( "x", x=[11, 22] ) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) results = wf(x=[1, 2]) assert results[0].output.out == 3 @@ -683,7 +683,7 @@ def test_wf_st_1_upd_inp_call(tmpdir): def test_wf_st_noinput_1(plugin, tmpdir): """Workflow with one task, a splitter for the workflow""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -704,7 +704,7 @@ def test_wf_st_noinput_1(plugin, tmpdir): def test_wf_ndst_1(plugin, tmpdir): """workflow with one task, a splitter on the task level""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) wf.inputs.x = [1, 2] wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -725,7 +725,7 @@ def test_wf_ndst_updatespl_1(plugin, tmpdir): a splitter on the task level is added *after* calling add """ wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2")) + wf.add(Add2(name="add2")) wf.inputs.x = [1, 2] wf.add2.split("x", x=wf.lzin.x) wf.set_output([("out", wf.add2.lzout.out)]) @@ -747,7 +747,7 @@ def test_wf_ndst_updatespl_1a(plugin, tmpdir): a splitter on the task level is added *after* calling add """ wf = Workflow(name="wf_spl_1", input_spec=["x"]) - task_add2 = add2(name="add2", x=wf.lzin.x) + task_add2 = Add2(name="add2", x=wf.lzin.x) wf.add(task_add2) task_add2.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -770,7 +770,7 @@ def test_wf_ndst_updateinp_1(plugin, tmpdir): updating input of the task after calling add """ wf = Workflow(name="wf_spl_1", input_spec=["x", "y"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.add2.split("x", x=wf.lzin.y) @@ -790,7 +790,7 @@ def test_wf_ndst_updateinp_1(plugin, tmpdir): def test_wf_ndst_noinput_1(plugin, tmpdir): """workflow with one task, a splitter on the task level""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) wf.inputs.x = [] wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -809,7 +809,7 @@ def test_wf_ndst_noinput_1(plugin, tmpdir): def test_wf_st_2(plugin, tmpdir): """workflow with one task, splitters and combiner for workflow""" wf = Workflow(name="wf_st_2", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.lzin.x)) wf.split("x", x=[1, 2]).combine(combiner="x") wf.set_output([("out", wf.add2.lzout.out)]) @@ -831,7 +831,7 @@ def test_wf_st_2(plugin, tmpdir): def test_wf_ndst_2(plugin, tmpdir): """workflow with one task, splitters and combiner on the task level""" wf = Workflow(name="wf_ndst_2", input_spec=["x"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x).combine(combiner="x")) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x).combine(combiner="x")) wf.inputs.x = [1, 2] wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -851,8 +851,8 @@ def test_wf_ndst_2(plugin, tmpdir): def test_wf_st_3(plugin, tmpdir): """workflow with 2 tasks, splitter on wf level""" wf = Workflow(name="wfst_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.split(("x", "y"), x=[1, 2], y=[11, 12]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -896,8 +896,8 @@ def test_wf_st_3(plugin, tmpdir): def test_wf_ndst_3(plugin, tmpdir): """Test workflow with 2 tasks, splitter on a task level""" wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.set_output([("out", wf.add2.lzout.out)]) @@ -916,8 +916,8 @@ def test_wf_ndst_3(plugin, tmpdir): def test_wf_st_4(plugin, tmpdir): """workflow with two tasks, scalar splitter and combiner for the workflow""" wf = Workflow(name="wf_st_4", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.split(("x", "y"), x=[1, 2], y=[11, 12]) wf.combine("x") @@ -942,8 +942,8 @@ def test_wf_st_4(plugin, tmpdir): def test_wf_ndst_4(plugin, tmpdir): """workflow with two tasks, scalar splitter and combiner on tasks level""" wf = Workflow(name="wf_ndst_4", input_spec=["a", "b"]) - wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.a, y=wf.lzin.b)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) + wf.add(Multiply(name="mult").split(("x", "y"), x=wf.lzin.a, y=wf.lzin.b)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -965,8 +965,8 @@ def test_wf_ndst_4(plugin, tmpdir): def test_wf_st_5(plugin, tmpdir): """workflow with two tasks, outer splitter and no combiner""" wf = Workflow(name="wf_st_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.split(["x", "y"], x=[1, 2], y=[11, 12]) wf.set_output([("out", wf.add2.lzout.out)]) @@ -989,8 +989,8 @@ def test_wf_st_5(plugin, tmpdir): def test_wf_ndst_5(plugin, tmpdir): """workflow with two tasks, outer splitter on tasks level and no combiner""" wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.set_output([("out", wf.add2.lzout.out)]) @@ -1011,8 +1011,8 @@ def test_wf_ndst_5(plugin, tmpdir): def test_wf_st_6(plugin, tmpdir): """workflow with two tasks, outer splitter and combiner for the workflow""" wf = Workflow(name="wf_st_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]) wf.combine("x") @@ -1038,8 +1038,8 @@ def test_wf_st_6(plugin, tmpdir): def test_wf_ndst_6(plugin, tmpdir): """workflow with two tasks, outer splitter and combiner on tasks level""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) + wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] wf.set_output([("out", wf.add2.lzout.out)]) @@ -1059,8 +1059,8 @@ def test_wf_ndst_6(plugin, tmpdir): def test_wf_ndst_7(plugin, tmpdir): """workflow with two tasks, outer splitter and (full) combiner for first node only""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split("x", x=wf.lzin.x, y=wf.lzin.y).combine("x")) - wf.add(identity(name="iden", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult").split("x", x=wf.lzin.x, y=wf.lzin.y).combine("x")) + wf.add(Identity(name="iden", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = 11 wf.set_output([("out", wf.iden.lzout.out)]) @@ -1080,9 +1080,9 @@ def test_wf_ndst_8(plugin, tmpdir): """workflow with two tasks, outer splitter and (partial) combiner for first task only""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) wf.add( - multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") + Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") ) - wf.add(identity(name="iden", x=wf.mult.lzout.out)) + wf.add(Identity(name="iden", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] wf.set_output([("out", wf.iden.lzout.out)]) @@ -1103,11 +1103,11 @@ def test_wf_ndst_9(plugin, tmpdir): """workflow with two tasks, outer splitter and (full) combiner for first task only""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) wf.add( - multiply(name="mult") + Multiply(name="mult") .split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y) .combine(["x", "y"]) ) - wf.add(identity(name="iden", x=wf.mult.lzout.out)) + wf.add(Identity(name="iden", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] wf.set_output([("out", wf.iden.lzout.out)]) @@ -1129,9 +1129,9 @@ def test_wf_ndst_9(plugin, tmpdir): def test_wf_3sernd_ndst_1(plugin, tmpdir): """workflow with three "serial" tasks, checking if the splitter is propagating""" wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2_1st", x=wf.mult.lzout.out)) - wf.add(add2(name="add2_2nd", x=wf.add2_1st.lzout.out)) + wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2_1st", x=wf.mult.lzout.out)) + wf.add(Add2(name="add2_2nd", x=wf.add2_1st.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.set_output([("out", wf.add2_2nd.lzout.out)]) @@ -1168,9 +1168,9 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): and the 2nd task is adding one more input to the splitter """ wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(add2(name="add2_1st").split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.add2_1st.lzout.out).split("y", y=wf.lzin.y)) - wf.add(add2(name="add2_2nd", x=wf.mult.lzout.out)) + wf.add(Add2(name="add2_1st").split("x", x=wf.lzin.x)) + wf.add(Multiply(name="mult", x=wf.add2_1st.lzout.out).split("y", y=wf.lzin.y)) + wf.add(Add2(name="add2_2nd", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.set_output([("out", wf.add2_2nd.lzout.out)]) @@ -1208,9 +1208,9 @@ def test_wf_3nd_st_1(plugin_dask_opt, tmpdir): splitter on the workflow level """ wf = Workflow(name="wf_st_7", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]) wf.set_output([("out", wf.mult.lzout.out)]) @@ -1236,9 +1236,9 @@ def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): splitter on the tasks levels """ wf = Workflow(name="wf_ndst_7", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] wf.set_output([("out", wf.mult.lzout.out)]) @@ -1259,9 +1259,9 @@ def test_wf_3nd_st_2(plugin, tmpdir): splitter and partial combiner on the workflow level """ wf = Workflow(name="wf_st_8", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") wf.set_output([("out", wf.mult.lzout.out)]) @@ -1289,10 +1289,10 @@ def test_wf_3nd_ndst_2(plugin, tmpdir): splitter and partial combiner on the tasks levels """ wf = Workflow(name="wf_ndst_8", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( + Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( "add2x.x" ) ) @@ -1317,9 +1317,9 @@ def test_wf_3nd_st_3(plugin, tmpdir): splitter and partial combiner (from the second task) on the workflow level """ wf = Workflow(name="wf_st_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") wf.set_output([("out", wf.mult.lzout.out)]) @@ -1347,10 +1347,10 @@ def test_wf_3nd_ndst_3(plugin, tmpdir): splitter and partial combiner (from the second task) on the tasks levels """ wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( + Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( "add2y.x" ) ) @@ -1376,9 +1376,9 @@ def test_wf_3nd_st_4(plugin, tmpdir): splitter and full combiner on the workflow level """ wf = Workflow(name="wf_st_10", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) wf.set_output([("out", wf.mult.lzout.out)]) wf.plugin = plugin @@ -1406,10 +1406,10 @@ def test_wf_3nd_ndst_4(plugin, tmpdir): splitter and full combiner on the tasks levels """ wf = Workflow(name="wf_ndst_10", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( + Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( ["add2x.x", "add2y.x"] ) ) @@ -1434,10 +1434,10 @@ def test_wf_3nd_st_5(plugin, tmpdir): splitter and partial combiner (from the second task) on the workflow level """ wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) + wf.add(Add2(name="add2x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) wf.add( - fun_addvar3( + FunAddVar3( name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out, c=wf.lzin.z ) ) @@ -1472,10 +1472,10 @@ def test_wf_3nd_ndst_5(plugin, tmpdir): all tasks have splitters and the last one has a partial combiner (from the 2nd) """ wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( - fun_addvar3(name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out) + FunAddVar3(name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out) .split("c", c=wf.lzin.z) .combine("add2x.x") ) @@ -1505,10 +1505,10 @@ def test_wf_3nd_ndst_6(plugin, tmpdir): the third one uses scalar splitter from the previous ones and a combiner """ wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out) + Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out) .split(("_add2x", "_add2y")) .combine("add2y.x") ) @@ -1531,10 +1531,10 @@ def test_wf_3nd_ndst_7(plugin, tmpdir): the third one uses scalar splitter from the previous ones """ wf = Workflow(name="wf_ndst_9", input_spec=["x"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y").split("x", x=wf.lzin.x)) wf.add( - multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).split( + Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).split( ("_add2x", "_add2y") ) ) @@ -1559,18 +1559,18 @@ def test_wf_3nd_8(tmpdir): wf = Workflow(name="wf", input_spec=["zip"], cache_dir=tmpdir) wf.inputs.zip = [["test1", "test3", "test5"], ["test2", "test4", "test6"]] - wf.add(identity_2flds(name="iden2flds_1", x2="Hoi").split("x1", x1=wf.lzin.zip)) + wf.add(Identity2Flds(name="iden2flds_1", x2="Hoi").split("x1", x1=wf.lzin.zip)) - wf.add(identity(name="identity", x=wf.iden2flds_1.lzout.out1)) + wf.add(Identity(name="identity", x=wf.iden2flds_1.lzout.out1)) wf.add( - identity_2flds( + Identity2Flds( name="iden2flds_2", x1=wf.identity.lzout.out, x2=wf.iden2flds_1.lzout.out2 ) ) wf.add( - identity_2flds( + Identity2Flds( name="iden2flds_2a", x1=wf.iden2flds_1.lzout.out1, x2=wf.iden2flds_1.lzout.out2, @@ -1608,8 +1608,8 @@ def test_wf_ndstLR_1(plugin, tmpdir): and the Left part from the first task should be added """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.add2.lzout.out).split("y", y=wf.lzin.y)) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(Multiply(name="mult", x=wf.add2.lzout.out).split("y", y=wf.lzin.y)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.set_output([("out", wf.mult.lzout.out)]) @@ -1636,9 +1636,9 @@ def test_wf_ndstLR_1a(plugin, tmpdir): and the Right part (it's own splitter) """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) wf.add( - multiply(name="mult").split(["_add2", "y"], x=wf.add2.lzout.out, y=wf.lzin.y) + Multiply(name="mult").split(["_add2", "y"], x=wf.add2.lzout.out, y=wf.lzin.y) ) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] @@ -1666,9 +1666,9 @@ def test_wf_ndstLR_2(plugin, tmpdir): and the Left part from the first task should be added """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) wf.add( - fun_addvar3(name="addvar", a=wf.add2.lzout.out).split( + FunAddVar3(name="addvar", a=wf.add2.lzout.out).split( ["b", "c"], b=wf.lzin.y, c=wf.lzin.z ) ) @@ -1715,9 +1715,9 @@ def test_wf_ndstLR_2a(plugin, tmpdir): and the Right part (it's own outer splitter) """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) wf.add( - fun_addvar3(name="addvar", a=wf.add2.lzout.out).split( + FunAddVar3(name="addvar", a=wf.add2.lzout.out).split( ["_add2", ["b", "c"]], b=wf.lzin.y, c=wf.lzin.z ) ) @@ -1766,8 +1766,8 @@ def test_wf_ndstinner_1(plugin, tmpdir): the second task has inner splitter """ wf = Workflow(name="wf_st_3", input_spec={"x": int}) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(add2(name="add2").split("x", x=wf.list.lzout.out)) + wf.add(ListOutput(name="list", x=wf.lzin.x)) + wf.add(Add2(name="add2").split("x", x=wf.list.lzout.out)) wf.inputs.x = 1 wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -1790,8 +1790,8 @@ def test_wf_ndstinner_2(plugin, tmpdir): the second task has two inputs and inner splitter from one of the input """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) + wf.add(ListOutput(name="list", x=wf.lzin.x)) + wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) wf.inputs.x = 1 wf.inputs.y = 10 wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) @@ -1815,8 +1815,8 @@ def test_wf_ndstinner_3(plugin, tmpdir): the second task has two inputs and outer splitter that includes an inner field """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.list.lzout.out, y=wf.lzin.y)) + wf.add(ListOutput(name="list", x=wf.lzin.x)) + wf.add(Multiply(name="mult").split(["x", "y"], x=wf.list.lzout.out, y=wf.lzin.y)) wf.inputs.x = 1 wf.inputs.y = [10, 100] wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) @@ -1841,9 +1841,9 @@ def test_wf_ndstinner_4(plugin, tmpdir): the third task has no its own splitter """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(ListOutput(name="list", x=wf.lzin.x)) + wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.inputs.x = 1 wf.inputs.y = 10 wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) @@ -1872,9 +1872,9 @@ def test_wf_ndstinner_5(plugin, tmpdir): the third task has no new splitter """ wf = Workflow(name="wf_5", input_spec=["x", "y", "b"]) - wf.add(list_output(name="list").split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult").split(["y", "x"], x=wf.list.lzout.out, y=wf.lzin.y)) - wf.add(fun_addvar(name="addvar", a=wf.mult.lzout.out).split("b", b=wf.lzin.b)) + wf.add(ListOutput(name="list").split("x", x=wf.lzin.x)) + wf.add(Multiply(name="mult").split(["y", "x"], x=wf.list.lzout.out, y=wf.lzin.y)) + wf.add(FunAddVar(name="addvar", a=wf.mult.lzout.out).split("b", b=wf.lzin.b)) wf.inputs.x = [1, 2] wf.inputs.y = [10, 100] wf.inputs.b = [3, 5] @@ -1956,8 +1956,8 @@ def test_wf_ndstinner_5(plugin, tmpdir): def test_wf_st_singl_1(plugin, tmpdir): """workflow with two tasks, only one input is in the splitter and combiner""" wf = Workflow(name="wf_st_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.split("x", x=[1, 2], y=11) wf.combine("x") @@ -1981,8 +1981,8 @@ def test_wf_ndst_singl_1(plugin, tmpdir): only one input is part of the splitter, the other is a single value """ wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) + wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.inputs.x = [1, 2] wf.inputs.y = 11 wf.set_output([("out", wf.add2.lzout.out)]) @@ -2003,9 +2003,9 @@ def test_wf_st_singl_2(plugin, tmpdir): only one input is part of the splitter, the other is a single value """ wf = Workflow(name="wf_st_6", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.split("x", x=[1, 2, 3], y=11) wf.set_output([("out", wf.mult.lzout.out)]) @@ -2031,9 +2031,9 @@ def test_wf_ndst_singl_2(plugin, tmpdir): only one input is part of the splitter, the other is a single value """ wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(add2(name="add2y", x=wf.lzin.y)) - wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) + wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2y", x=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = 11 wf.set_output([("out", wf.mult.lzout.out)]) @@ -2057,7 +2057,7 @@ def test_wfasnd_1(plugin, tmpdir): workflow-node with one task and no splitter """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wfnd.inputs.x = 2 @@ -2082,7 +2082,7 @@ def test_wfasnd_wfinp_1(plugin, tmpdir): """ wf = Workflow(name="wf", input_spec=["x"]) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) @@ -2108,7 +2108,7 @@ def test_wfasnd_wfndupdate(plugin, tmpdir): """ wfnd = Workflow(name="wfnd", input_spec=["x"], x=2) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf = Workflow(name="wf", input_spec=["x"], x=3) @@ -2133,7 +2133,7 @@ def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): """ wfnd = Workflow(name="wfnd", input_spec=["x"], x=2) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wfnd.cache_dir = tmpdir with Submitter(worker=plugin) as sub: @@ -2176,7 +2176,7 @@ def test_wfasnd_st_1(plugin, tmpdir): splitter for wfnd """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wfnd.split("x", x=[2, 4]) @@ -2202,7 +2202,7 @@ def test_wfasnd_st_updatespl_1(plugin, tmpdir): splitter for wfnd is set after add """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf = Workflow(name="wf", input_spec=["x"]) @@ -2226,7 +2226,7 @@ def test_wfasnd_ndst_1(plugin, tmpdir): splitter for node """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2").split("x", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2").split("x", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) # TODO: without this the test is failing wfnd.plugin = plugin @@ -2252,7 +2252,7 @@ def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): splitter for node added after add """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.add2.split("x", x=[2, 4]) wfnd.set_output([("out", wfnd.add2.lzout.out)]) @@ -2277,7 +2277,7 @@ def test_wfasnd_wfst_1(plugin, tmpdir): """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) @@ -2305,13 +2305,13 @@ def test_wfasnd_st_2(plugin, tmpdir): splitter for wfnd """ wfnd = Workflow(name="wfnd", input_spec=["x", "y"]) - wfnd.add(multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) + wfnd.add(Multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) wfnd.set_output([("out", wfnd.mult.lzout.out)]) wfnd.split(("x", "y"), x=[2, 4], y=[1, 10]) wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wf.add(wfnd) - wf.add(add2(name="add2", x=wf.wfnd.lzout.out)) + wf.add(Add2(name="add2", x=wf.wfnd.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -2331,11 +2331,11 @@ def test_wfasnd_wfst_2(plugin, tmpdir): """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wfnd = Workflow(name="wfnd", input_spec=["x", "y"], x=wf.lzin.x, y=wf.lzin.y) - wfnd.add(multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) + wfnd.add(Multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) wfnd.set_output([("out", wfnd.mult.lzout.out)]) wf.add(wfnd) - wf.add(add2(name="add2", x=wf.wfnd.lzout.out)) + wf.add(Add2(name="add2", x=wf.wfnd.lzout.out)) wf.split(("x", "y"), x=[2, 4], y=[1, 10]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -2361,12 +2361,12 @@ def test_wfasnd_ndst_3(plugin, tmpdir): splitter for the first task """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) wf.inputs.x = [2, 4] wf.inputs.y = [1, 10] wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) @@ -2388,11 +2388,11 @@ def test_wfasnd_wfst_3(plugin, tmpdir): splitter for the main workflow """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) wf.split(("x", "y"), x=[2, 4], y=[1, 10]) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) @@ -2420,8 +2420,8 @@ def test_wfasnd_4(plugin, tmpdir): workflow-node with two tasks and no splitter """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2_1st", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) + wfnd.add(Add2(name="add2_1st", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) wfnd.inputs.x = 2 @@ -2445,8 +2445,8 @@ def test_wfasnd_ndst_4(plugin, tmpdir): splitter for node """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2_1st").split("x", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) + wfnd.add(Add2(name="add2_1st").split("x", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) wfnd.inputs.x = [2, 4] @@ -2471,8 +2471,8 @@ def test_wfasnd_wfst_4(plugin, tmpdir): """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2_1st", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) + wfnd.add(Add2(name="add2_1st", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) wf.add(wfnd) @@ -2500,8 +2500,8 @@ def test_wf_nostate_cachedir(plugin, tmpdir): cache_dir = tmpdir.mkdir("test_wf_cache_1") wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.inputs.y = 3 @@ -2524,8 +2524,8 @@ def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): tmpdir.mkdir(cache_dir) wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2(name="add2", x=wf.mult.lzout.out)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.inputs.x = 2 wf.inputs.y = 3 @@ -2550,8 +2550,8 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2570,8 +2570,8 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -2606,8 +2606,8 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf1", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2627,8 +2627,8 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -2666,8 +2666,8 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2687,8 +2687,8 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) # additional output wf2.set_output([("out_pr", wf2.add2.lzout.out)]) @@ -2727,8 +2727,8 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out1", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2748,8 +2748,8 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out2", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -2784,8 +2784,8 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf1", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out1", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2805,8 +2805,8 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out2", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -2842,8 +2842,8 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2863,8 +2863,8 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -2900,8 +2900,8 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2922,8 +2922,8 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): cache_locations=cache_dir1, rerun=True, # wh has to be rerun (default for propagate_rerun is True) ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -2963,8 +2963,8 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -2986,8 +2986,8 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): rerun=True, # wh has to be rerun propagate_rerun=False, # but rerun doesn't propagate to the tasks ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -3027,8 +3027,8 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -3050,9 +3050,9 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd rerun=True, propagate_rerun=False, # rerun will not be propagated to each task ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) # rerun on the task level needed (wf.propagate_rerun is False) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out, rerun=True)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out, rerun=True)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -3090,8 +3090,8 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x"], cache_dir=cache_dir1) - wf1.add(ten(name="ten", x=wf1.lzin.x)) - wf1.add(add2(name="add2", x=wf1.ten.lzout.out)) + wf1.add(Ten(name="ten", x=wf1.lzin.x)) + wf1.add(Add2(name="add2", x=wf1.ten.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 3 wf1.plugin = plugin @@ -3108,8 +3108,8 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(ten(name="ten", x=wf2.lzin.x)) - wf2.add(add2(name="add2", x=wf2.ten.lzout.out)) + wf2.add(Ten(name="ten", x=wf2.lzin.x)) + wf2.add(Add2(name="add2", x=wf2.ten.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.plugin = plugin @@ -3139,8 +3139,8 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x"], cache_dir=cache_dir1) - wf1.add(ten(name="ten", x=wf1.lzin.x)) - wf1.add(add2(name="add2", x=wf1.ten.lzout.out)) + wf1.add(Ten(name="ten", x=wf1.lzin.x)) + wf1.add(Add2(name="add2", x=wf1.ten.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 3 wf1.plugin = plugin @@ -3152,8 +3152,8 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): assert 12 == results1.output.out wf2 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir2) - wf2.add(ten(name="ten", x=wf2.lzin.x)) - wf2.add(add2(name="add2", x=wf2.ten.lzout.out)) + wf2.add(Ten(name="ten", x=wf2.lzin.x)) + wf2.add(Add2(name="add2", x=wf2.ten.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.plugin = plugin @@ -3184,8 +3184,8 @@ def test_wf_state_cachelocations(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf1.plugin = plugin @@ -3205,8 +3205,8 @@ def test_wf_state_cachelocations(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin @@ -3248,8 +3248,8 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf1.plugin = plugin @@ -3269,8 +3269,8 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin @@ -3313,8 +3313,8 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf1.plugin = plugin @@ -3334,8 +3334,8 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin @@ -3377,8 +3377,8 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -3396,8 +3396,8 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin @@ -3428,8 +3428,8 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -3449,8 +3449,8 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -3487,8 +3487,8 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): cache_dir2 = tmpdir.mkdir("test_wf_cache4") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -3507,8 +3507,8 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): cache_locations=cache_dir1, ) # different argument assignment - wf2.add(multiply(name="mult", x=wf2.lzin.y, y=wf2.lzin.x)) - wf2.add(add2(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Multiply(name="mult", x=wf2.lzin.y, y=wf2.lzin.x)) + wf2.add(Add2(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = 2 wf2.inputs.y = 3 @@ -3540,9 +3540,9 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) + Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = [2, 20] wf1.inputs.y = [3, 4] @@ -3563,9 +3563,9 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): cache_locations=cache_dir1, ) wf2.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) + Multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) ) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = [2, 20] wf2.inputs.y = [3, 4] @@ -3605,9 +3605,9 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) + Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = [2, 20] wf1.inputs.y = [3, 4] @@ -3628,9 +3628,9 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): cache_locations=cache_dir1, ) wf2.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) + Multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) ) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = [2, 20] wf2.inputs.y = [3, 4] @@ -3668,9 +3668,9 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) + Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = [2, 20] wf1.inputs.y = [3, 4] @@ -3690,9 +3690,9 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult")) + wf2.add(Multiply(name="mult")) wf2.mult.split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = [2, 20] wf2.inputs.y = [3, 4] @@ -3731,9 +3731,9 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) + Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = [2, 20] wf1.inputs.y = [3, 4] @@ -3754,9 +3754,9 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): cache_locations=cache_dir1, ) wf2.add( - multiply(name="mult").split(splitter=["x", "y"], x=wf2.lzin.x, y=wf2.lzin.y) + Multiply(name="mult").split(splitter=["x", "y"], x=wf2.lzin.x, y=wf2.lzin.y) ) - wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) + wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = [2, 20] wf2.inputs.y = [3, 4] @@ -3793,8 +3793,8 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.inputs.x = 2 wf1.inputs.y = 3 @@ -3839,8 +3839,8 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) + wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) + wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) wf1.plugin = plugin @@ -3882,8 +3882,8 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): def create_tasks(): wf = Workflow(name="wf", input_spec=["x"]) wf.inputs.x = 1 - wf.add(add2(name="t1", x=wf.lzin.x)) - wf.add(multiply(name="t2", x=wf.t1.lzout.out, y=2)) + wf.add(Add2(name="t1", x=wf.lzin.x)) + wf.add(Multiply(name="t2", x=wf.t1.lzout.out, y=2)) wf.set_output([("out", wf.t2.lzout.out)]) t1 = wf.name2obj["t1"] t2 = wf.name2obj["t2"] @@ -3920,9 +3920,9 @@ def test_cache_propagation3(tmpdir, create_tasks): def test_workflow_combine1(tmpdir): wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) - wf1.add(power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b)) - wf1.add(identity(name="identity1", x=wf1.power.lzout.out).combine("power.a")) - wf1.add(identity(name="identity2", x=wf1.identity1.lzout.out).combine("power.b")) + wf1.add(Power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b)) + wf1.add(Identity(name="identity1", x=wf1.power.lzout.out).combine("power.a")) + wf1.add(Identity(name="identity2", x=wf1.identity1.lzout.out).combine("power.b")) wf1.set_output( { "out_pow": wf1.power.lzout.out, @@ -3941,9 +3941,9 @@ def test_workflow_combine1(tmpdir): def test_workflow_combine2(tmpdir): wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) wf1.add( - power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b).combine("a") + Power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b).combine("a") ) - wf1.add(identity(name="identity", x=wf1.power.lzout.out).combine("power.b")) + wf1.add(Identity(name="identity", x=wf1.power.lzout.out).combine("power.b")) wf1.set_output({"out_pow": wf1.power.lzout.out, "out_iden": wf1.identity.lzout.out}) wf1.cache_dir = tmpdir outputs = wf1() @@ -3961,8 +3961,8 @@ def test_wf_lzoutall_1(plugin, tmpdir): by using lzout.all syntax """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out", wf.add_sub.lzout.out_add)]) wf.inputs.x = 2 wf.inputs.y = 3 @@ -3982,8 +3982,8 @@ def test_wf_lzoutall_1a(plugin, tmpdir): by using lzout.all syntax in the node connections and for wf output """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_all", wf.add_sub.lzout.all_)]) wf.inputs.x = 2 wf.inputs.y = 3 @@ -4003,8 +4003,8 @@ def test_wf_lzoutall_st_1(plugin, tmpdir): by using lzout.all syntax """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) wf.inputs.x = [2, 20] wf.inputs.y = [3, 30] @@ -4025,8 +4025,8 @@ def test_wf_lzoutall_st_1a(plugin, tmpdir): by using lzout.all syntax """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_all", wf.add_sub.lzout.all_)]) wf.inputs.x = [2, 20] wf.inputs.y = [3, 30] @@ -4053,9 +4053,9 @@ def test_wf_lzoutall_st_2(plugin, tmpdir): """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) wf.add( - multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") + Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") ) - wf.add(add2_sub2_res_list(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(Add2Sub2ResList(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) wf.inputs.x = [2, 20] wf.inputs.y = [3, 30] @@ -4085,9 +4085,9 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir): """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) wf.add( - multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") + Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") ) - wf.add(add2_sub2_res_list(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(Add2Sub2ResList(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_all", wf.add_sub.lzout.all_)]) wf.inputs.x = [2, 20] wf.inputs.y = [3, 30] @@ -4111,7 +4111,7 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir): def test_wf_resultfile_1(plugin, tmpdir): """workflow with a file in the result, file should be copied to the wf dir""" wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_write_file(name="writefile", filename=wf.lzin.x)) + wf.add(FunWriteFile(name="writefile", filename=wf.lzin.x)) wf.inputs.x = "file_1.txt" wf.plugin = plugin wf.set_output([("wf_out", wf.writefile.lzout.out)]) @@ -4131,7 +4131,7 @@ def test_wf_resultfile_2(plugin, tmpdir): all files should be copied to the wf dir """ wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_write_file_list(name="writefile", filename_list=wf.lzin.x)) + wf.add(FunWriteFileList(name="writefile", filename_list=wf.lzin.x)) file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] wf.inputs.x = file_list wf.plugin = plugin @@ -4152,7 +4152,7 @@ def test_wf_resultfile_3(plugin, tmpdir): all files should be copied to the wf dir """ wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_write_file_list2dict(name="writefile", filename_list=wf.lzin.x)) + wf.add(FunWriteFileList2Dict(name="writefile", filename_list=wf.lzin.x)) file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] wf.inputs.x = file_list wf.plugin = plugin @@ -4175,10 +4175,10 @@ def test_wf_resultfile_3(plugin, tmpdir): def test_wf_upstream_error1(plugin, tmpdir): """workflow with two tasks, task2 dependent on an task1 which raised an error""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4193,10 +4193,10 @@ def test_wf_upstream_error2(plugin, tmpdir): goal - workflow finish running, one output errors but the other doesn't """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.split("x", x=[1, "hi"]) # workflow-level split TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(Exception) as excinfo: @@ -4212,11 +4212,11 @@ def test_wf_upstream_error3(plugin, tmpdir): goal - workflow finish running, one output errors but the other doesn't """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1")) + wf.add(FunAddVarDefaultNoType(name="addvar1")) wf.inputs.x = [1, "hi"] # TypeError for adding str and int wf.addvar1.split("a", a=wf.lzin.x) # task-level split wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(Exception) as excinfo: @@ -4229,7 +4229,7 @@ def test_wf_upstream_error3(plugin, tmpdir): def test_wf_upstream_error4(plugin, tmpdir): """workflow with one task, which raises an error""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin wf.set_output([("out", wf.addvar1.lzout.out)]) @@ -4245,7 +4245,7 @@ def test_wf_upstream_error5(plugin, tmpdir): """nested workflow with one task, which raises an error""" wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.plugin = plugin wf.set_output([("wf_out", wf.addvar1.lzout.out)]) @@ -4265,8 +4265,8 @@ def test_wf_upstream_error6(plugin, tmpdir): """nested workflow with two tasks, the first one raises an error""" wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) wf.plugin = plugin wf.set_output([("wf_out", wf.addvar2.lzout.out)]) @@ -4288,11 +4288,11 @@ def test_wf_upstream_error7(plugin, tmpdir): the last task is set as the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar3", a=wf.addvar2.lzout.out)) wf.set_output([("out", wf.addvar3.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4310,11 +4310,11 @@ def test_wf_upstream_error7a(plugin, tmpdir): the second task is set as the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar3", a=wf.addvar2.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4332,11 +4332,11 @@ def test_wf_upstream_error7b(plugin, tmpdir): the second and the third tasks are set as the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar3", a=wf.addvar2.lzout.out)) wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addvar3.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4351,11 +4351,11 @@ def test_wf_upstream_error7b(plugin, tmpdir): def test_wf_upstream_error8(plugin, tmpdir): """workflow with three tasks, the first one raises an error, so 2 others are removed""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addtwo(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(FunAddTwo(name="addtwo", a=wf.addvar1.lzout.out)) wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addtwo.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4375,13 +4375,13 @@ def test_wf_upstream_error9(plugin, tmpdir): the errored branch is connected to the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = 2 - wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default_notype(name="follow_err", a=wf.err.lzout.out)) + wf.add(FunAddVarNoType(name="err", a=wf.addvar1.lzout.out, b="hi")) + wf.add(FunAddVarDefaultNoType(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addtwo.lzout.out)) + wf.add(FunAddTwoNoType(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addtwo.lzout.out)) wf.set_output([("out1", wf.follow_err.lzout.out)]) wf.plugin = plugin @@ -4402,13 +4402,13 @@ def test_wf_upstream_error9a(plugin, tmpdir): so the workflow finished clean """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefault(name="addvar1", a=wf.lzin.x)) wf.inputs.x = 2 - wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default(name="follow_err", a=wf.err.lzout.out)) + wf.add(FunAddVarNoType(name="err", a=wf.addvar1.lzout.out, b="hi")) + wf.add(FunAddVarDefault(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar2", a=wf.addtwo.lzout.out)) + wf.add(FunAddTwoNoType(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefault(name="addvar2", a=wf.addtwo.lzout.out)) wf.set_output([("out1", wf.addvar2.lzout.out)]) # , ("out2", wf.addtwo.lzout.out)]) wf.plugin = plugin @@ -4425,13 +4425,13 @@ def test_wf_upstream_error9b(plugin, tmpdir): both branches are connected to the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) wf.inputs.x = 2 - wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default_notype(name="follow_err", a=wf.err.lzout.out)) + wf.add(FunAddVarNoType(name="err", a=wf.addvar1.lzout.out, b="hi")) + wf.add(FunAddVarDefaultNoType(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addtwo.lzout.out)) + wf.add(FunAddTwoNoType(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addtwo.lzout.out)) wf.set_output([("out1", wf.follow_err.lzout.out), ("out2", wf.addtwo.lzout.out)]) wf.plugin = plugin @@ -4474,9 +4474,9 @@ def exporting_graphs(wf, name): def test_graph_1(tmpdir, splitter): """creating a set of graphs, wf with two nodes""" wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult_1", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult_1.lzout.out)) + wf.add(Multiply(name="mult_1", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.mult_1.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) wf.split(splitter, x=[1, 2]) @@ -4516,9 +4516,9 @@ def test_graph_1st(tmpdir): some nodes have splitters, should be marked with blue color """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult_1", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult_1.lzout.out)) + wf.add(Multiply(name="mult_1", y=wf.lzin.y).split("x", x=wf.lzin.x)) + wf.add(Multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.mult_1.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) # simple graph @@ -4557,9 +4557,9 @@ def test_graph_1st_cmb(tmpdir): first two nodes should be blue and the arrow between them should be blue """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.add(list_sum(name="sum", x=wf.add2.lzout.out)) + wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) + wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) + wf.add(ListSum(name="sum", x=wf.add2.lzout.out)) wf.set_output([("out", wf.sum.lzout.out)]) # simple graph dotfile_s = wf.create_dotfile() @@ -4597,7 +4597,7 @@ def test_graph_2(tmpdir): """creating a graph, wf with one workflow as a node""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) wf.set_output([("out", wf.wfnd.lzout.out)]) @@ -4631,7 +4631,7 @@ def test_graph_2st(tmpdir): """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) wfnd = Workflow(name="wfnd", input_spec=["x"]).split("x", x=wf.lzin.x) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) wf.set_output([("out", wf.wfnd.lzout.out)]) @@ -4664,10 +4664,10 @@ def test_graph_2st(tmpdir): def test_graph_3(tmpdir): """creating a set of graphs, wf with two nodes (one node is a workflow)""" wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) wf.set_output([("out", wf.wfnd.lzout.out)]) @@ -4706,10 +4706,10 @@ def test_graph_3st(tmpdir): (blue node and a wfasnd, and blue arrow from the node to the wfasnd) """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) + wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) wf.set_output([("out", wf.wfnd.lzout.out)]) @@ -4747,10 +4747,10 @@ def test_graph_4(tmpdir): inside). Connection from the node to the inner workflow. """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(add2(name="add2_a", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_b", x=wfnd.add2_a.lzout.out)) + wfnd.add(Add2(name="add2_a", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2_b", x=wfnd.add2_a.lzout.out)) wfnd.set_output([("out", wfnd.add2_b.lzout.out)]) wf.add(wfnd) wf.set_output([("out", wf.wfnd.lzout.out)]) @@ -4791,11 +4791,11 @@ def test_graph_5(tmpdir): """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(add2(name="add2_a", x=wfnd.lzin.x)) - wfnd.add(add2(name="add2_b", x=wfnd.add2_a.lzout.out)) + wfnd.add(Add2(name="add2_a", x=wfnd.lzin.x)) + wfnd.add(Add2(name="add2_b", x=wfnd.add2_a.lzout.out)) wfnd.set_output([("out", wfnd.add2_b.lzout.out)]) wf.add(wfnd) - wf.add(multiply(name="mult", x=wf.wfnd.lzout.out, y=wf.lzin.y)) + wf.add(Multiply(name="mult", x=wf.wfnd.lzout.out, y=wf.lzin.y)) wf.set_output([("out", wf.mult.lzout.out)]) # simple graph @@ -4955,14 +4955,14 @@ def test_wf_state_arrays(): ) wf.add( # Split over workflow input "x" on "scalar" input - list_mult_sum( + ListMultSum( in_list=wf.lzin.x, name="A", ).split(scalar=wf.lzin.x) ) wf.add( # Workflow is still split over "x", combined over "x" on out - list_mult_sum( + ListMultSum( name="B", scalar=wf.A.lzout.sum, in_list=wf.A.lzout.products, @@ -4970,7 +4970,7 @@ def test_wf_state_arrays(): ) wf.add( # Workflow " - list_mult_sum( + ListMultSum( name="C", scalar=wf.lzin.y, in_list=wf.B.lzout.sum, @@ -4978,7 +4978,7 @@ def test_wf_state_arrays(): ) wf.add( # Workflow is split again, this time over C.products - list_mult_sum( + ListMultSum( name="D", in_list=wf.lzin.x, ) @@ -4987,7 +4987,7 @@ def test_wf_state_arrays(): ) wf.add( # Workflow is finally combined again into a single node - list_mult_sum(name="E", scalar=wf.lzin.y, in_list=wf.D.lzout.sum) + ListMultSum(name="E", scalar=wf.lzin.y, in_list=wf.D.lzout.sum) ) wf.set_output([("alpha", wf.E.lzout.sum), ("beta", wf.E.lzout.products)]) @@ -5005,7 +5005,7 @@ def test_wf_input_output_typing(): ) with pytest.raises(TypeError) as exc_info: - list_mult_sum( + ListMultSum( scalar=wf.lzin.y, in_list=wf.lzin.y, name="A", @@ -5013,7 +5013,7 @@ def test_wf_input_output_typing(): exc_info_matches(exc_info, "Cannot coerce into ") wf.add( # Split over workflow input "x" on "scalar" input - list_mult_sum( + ListMultSum( scalar=wf.lzin.x, in_list=wf.lzin.y, name="A", diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 8d0435cb25..9ea06ed60d 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -56,12 +56,12 @@ def result_submitter(shell_task, plugin): @python.define -def op_4var(a, b, c, d) -> str: +def Op4Var(a, b, c, d) -> str: return f"{a} {b} {c} {d}" @python.define -def fun_addtwo(a: int) -> int: +def FunAddTwo(a: int) -> int: import time time.sleep(1) @@ -71,7 +71,7 @@ def fun_addtwo(a: int) -> int: @python.define -def fun_addtwo_notype(a): +def FunAddTwoNoType(a): import time time.sleep(1) @@ -81,7 +81,7 @@ def fun_addtwo_notype(a): @python.define -def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: +def FunAddTwoWithThreadCount(a: int, sgeThreads: int = 1) -> int: import time time.sleep(1) @@ -91,24 +91,22 @@ def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: @python.define -def fun_addvar( - a: ty.Union[int, float], b: ty.Union[int, float] -) -> ty.Union[int, float]: +def FunAddVar(a: ty.Union[int, float], b: ty.Union[int, float]) -> ty.Union[int, float]: return a + b @python.define -def fun_addvar_notype(a, b): +def FunAddVarNoType(a, b): return a + b @python.define(outputs={"sum": float, "sub": float}) -def fun_addsubvar(a: float, b: float): +def FunAddSubVar(a: float, b: float): return a + b, a - b @python.define -def fun_addvar_none(a: int, b: ty.Optional[int]) -> int: +def FunAddVarNone(a: int, b: ty.Optional[int]) -> int: if b is None: return a else: @@ -116,127 +114,127 @@ def fun_addvar_none(a: int, b: ty.Optional[int]) -> int: @python.define -def fun_addvar_default(a: int, b: int = 1) -> int: +def FunAddVarDefault(a: int, b: int = 1) -> int: return a + b @python.define -def fun_addvar_default_notype(a, b=1): +def FunAddVarDefaultNoType(a, b=1): return a + b @python.define -def fun_addvar3(a: int, b: int, c: int) -> int: +def FunAddVar3(a: int, b: int, c: int) -> int: return a + b + c @python.define -def fun_addvar4(a: int, b: int, c: int, d: int) -> int: +def FunAddVar4(a: int, b: int, c: int, d: int) -> int: return a + b + c + d @python.define -def moment(lst: ty.List[float], n: float) -> float: +def Moment(lst: ty.List[float], n: float) -> float: return sum([i**n for i in lst]) / len(lst) @python.define -def fun_div(a: ty.Union[int, float], b: ty.Union[int, float]) -> float: +def FunDiv(a: ty.Union[int, float], b: ty.Union[int, float]) -> float: return a / b @python.define -def multiply(x: int, y: int) -> int: +def Multiply(x: int, y: int) -> int: return x * y @python.define -def multiply_list(x: list, y: int) -> list: +def MultiplyList(x: list, y: int) -> list: return x * y @python.define -def multiply_mixed(x: list, y: int) -> list: +def MultiplyMixed(x: list, y: int) -> list: return x * y @python.define -def add2(x: int) -> int: +def Add2(x: int) -> int: if x == 1 or x == 12: time.sleep(1) return x + 2 @python.define -def raise_xeq1(x: int) -> int: +def RaiseXeq1(x: int) -> int: if x == 1: raise Exception("x is 1, so i'm raising an exception!") return x @python.define(outputs={"out_add": float, "out_sub": float}) -def add2_sub2_res(res): +def Add2Sub2Res(res): """function that takes entire output as an input""" return res["out"] + 2, res["out"] - 2 @python.define(outputs={"out_add": ty.List[float], "out_sub": ty.List[float]}) -def add2_sub2_res_list(res): +def Add2Sub2ResList(res): """function that takes entire output as an input""" return [r["out"] + 2 for r in res], [r["out"] - 2 for r in res] @python.define -def power(a: int, b: int) -> int: +def Power(a: int, b: int) -> int: return a**b @python.define -def identity(x): +def Identity(x): return x @python.define(outputs={"out1": ty.Any, "out2": ty.Any}) -def identity_2flds(x1, x2): +def Identity2Flds(x1, x2): return x1, x2 @python.define -def ten(x) -> int: +def Ten(x) -> int: return 10 @python.define -def add2_wait(x: int) -> int: +def Add2Wait(x: int) -> int: time.sleep(2) return x + 2 @python.define -def list_output(x: int) -> ty.List[int]: +def ListOutput(x: int) -> ty.List[int]: return [x, 2 * x, 3 * x] @python.define -def list_sum(x: ty.Sequence[ty.Union[int, float]]) -> ty.Union[int, float]: +def ListSum(x: ty.Sequence[ty.Union[int, float]]) -> ty.Union[int, float]: return sum(x) @python.define -def fun_dict(d: dict) -> str: +def FunDict(d: dict) -> str: kv_list = [f"{k}:{v}" for (k, v) in d.items()] return "_".join(kv_list) @python.define -def fun_write_file(filename: Path, text="hello") -> File: +def FunWriteFile(filename: Path, text="hello") -> File: with open(filename, "w") as f: f.write(text) return File(filename) @python.define -def fun_write_file_list( +def FunWriteFileList( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" ) -> ty.List[File]: for ii, filename in enumerate(filename_list): @@ -247,7 +245,7 @@ def fun_write_file_list( @python.define -def fun_write_file_list2dict( +def FunWriteFileList2Dict( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" ) -> ty.Dict[str, ty.Union[File, int]]: filename_dict = {} @@ -261,14 +259,14 @@ def fun_write_file_list2dict( @python.define -def fun_file(filename: File): +def FunFile(filename: File): with open(filename) as f: txt = f.read() return txt @python.define -def fun_file_list(filename_list: ty.List[File]): +def FunFileList(filename_list: ty.List[File]): txt_list = [] for filename in filename_list: with open(filename) as f: @@ -276,79 +274,36 @@ def fun_file_list(filename_list: ty.List[File]): return " ".join(txt_list) -def gen_basic_wf(name="basic-wf"): - """ - Generates `Workflow` of two tasks - - Task Input - ---------- - x : int (5) - - Task Output - ----------- - out : int (9) - """ - - @workflow.define(outputs=["out"]) - def Workflow(x): - task1 = workflow.add(fun_addtwo(a=x, b=0)) - task2 = workflow.add(fun_addvar(a=task1.out, b=2)) - return task2.out +@workflow.define(outputs=["out"]) +def BasicWorkflow(x): + task1 = workflow.add(FunAddTwo(a=x, b=0)) + task2 = workflow.add(FunAddVar(a=task1.out, b=2)) + return task2.out - return Workflow(x=5) +@workflow.define(outputs=["out"]) +def BasicWorkflowWithThreadCount(x): + task1 = workflow.add(FunAddTwoWithThreadCount(a=x, sgeThreads=4)) + task2 = workflow.add(FunAddVar(a=task1.out, b=2)) + return task2.out -def gen_basic_wf_with_threadcount(name="basic-wf-with-threadcount"): - """ - Generates `Workflow` of two tasks - - Task Input - ---------- - x : int (5) - - Task Output - ----------- - out : int (9) - """ - - @workflow.define(outputs=["out"]) - def Workflow(x): - task1 = workflow.add(fun_addtwo_with_threadcount(a=x, sgeThreads=4)) - task2 = workflow.add(fun_addvar(a=task1.out, b=2)) - return task2.out - - return Workflow(x=5) - - -def gen_basic_wf_with_threadcount_concurrent(name="basic-wf-with-threadcount"): - """ - Generates `Workflow` of two tasks - - Task Input - ---------- - x : int (5) - - Task Output - ----------- - out : int (9) - """ - @workflow.define(outputs=["out1", "out2"]) - def Workflow(x): - task1_1 = workflow.add(fun_addtwo_with_threadcount(a=x, sgeThreads=4)) - task1_2 = workflow.add(fun_addtwo_with_threadcount(a=x, sgeThreads=2)) - task2 = workflow.add(fun_addvar(a=task1_1.out, b=2)) - return task2.out, task1_2.out +@workflow.define(outputs=["out1", "out2"]) +def BasicWorkflowWithThreadCountConcurrent(x): + task1_1 = workflow.add(FunAddTwoWithThreadCount(a=x, sgeThreads=4)) + task1_2 = workflow.add(FunAddTwoWithThreadCount(a=x, sgeThreads=2)) + task2 = workflow.add(FunAddVar(a=task1_1.out, b=2)) + return task2.out, task1_2.out - return Workflow(x=5) + # return Workflow(x=5) @python.define(outputs={"sum": int, "products": ty.List[int]}) -def list_mult_sum(scalar: int, in_list: ty.List[int]) -> ty.Tuple[int, ty.List[int]]: +def ListMultSum(scalar: int, in_list: ty.List[int]) -> ty.Tuple[int, ty.List[int]]: products = [scalar * x for x in in_list] return functools.reduce(operator.add, products, 0), products @python.define(outputs={"x": str, "y": int, "z": float}) -def foo(a: str, b: int, c: float) -> ty.Tuple[str, int, float]: +def Foo(a: str, b: int, c: float) -> ty.Tuple[str, int, float]: return a, b, c diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 7e9328cdd0..e76b932748 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -574,13 +574,23 @@ def dump_ast(node: ast.AST) -> bytes: node, annotate_fields=False, include_attributes=False ).encode() + def strip_annotations(node: ast.AST): + """Remove annotations from function arguments.""" + for arg in node.args.args: + arg.annotation = None + for arg in node.args.kwonlyargs: + arg.annotation = None + if node.args.vararg: + node.args.vararg.annotation = None + if node.args.kwarg: + node.args.kwarg.annotation = None + indent = re.match(r"(\s*)", src).group(1) if indent: src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) func_ast = ast.parse(src).body[0] + strip_annotations(func_ast) yield dump_ast(func_ast.args) - if func_ast.returns: - yield dump_ast(func_ast.returns) for stmt in func_ast.body: yield dump_ast(stmt) yield b")" diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000000..8114205dd8 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = -vv From d584074e6962c45eff9b0d71a9ed30b7bf364e55 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 17:17:21 +1100 Subject: [PATCH 163/342] debugged test_hash and test_typing --- pydra/engine/core.py | 28 ++++-- pydra/engine/helpers.py | 29 ++++-- pydra/engine/specs.py | 1 + pydra/engine/submitter.py | 9 +- pydra/engine/tests/test_helpers.py | 17 ++-- pydra/utils/tests/test_hash.py | 8 +- pydra/utils/tests/test_typing.py | 147 +++++++++-------------------- pydra/utils/tests/utils.py | 13 ++- pydra/utils/typing.py | 6 ++ 9 files changed, 119 insertions(+), 139 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 12c2c96965..058fed232f 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -357,7 +357,11 @@ def run(self, rerun: bool = False): self._populate_filesystem() os.chdir(self.output_dir) result = Result( - outputs=None, runtime=None, errored=False, output_dir=self.output_dir + outputs=None, + runtime=None, + errored=False, + output_dir=self.output_dir, + definition=self.definition, ) self.hooks.pre_run_task(self) self.audit.start_audit(odir=self.output_dir) @@ -409,7 +413,11 @@ async def run_async(self, rerun: bool = False) -> Result: cwd = os.getcwd() self._populate_filesystem() result = Result( - outputs=None, runtime=None, errored=False, output_dir=self.output_dir + outputs=None, + runtime=None, + errored=False, + output_dir=self.output_dir, + definition=self.definition, ) self.hooks.pre_run_task(self) self.audit.start_audit(odir=self.output_dir) @@ -506,7 +514,11 @@ def result(self, return_inputs=False): """ if self.errored: return Result( - outputs=None, runtime=None, errored=True, output_dir=self.output_dir + outputs=None, + runtime=None, + errored=True, + output_dir=self.output_dir, + definition=self.definition, ) checksum = self.checksum @@ -801,8 +813,8 @@ def _create_graph( # adding an edge to the graph if task id expecting output from a different task if lf.name != self.name: # checking if the connection is already in the graph - if (self[lf.name], node) not in graph.edges: - graph.add_edges((self[lf.name], node)) + if (graph.node(lf.name), node) not in graph.edges: + graph.add_edges((graph.node(lf.name), node)) if detailed: graph.add_edges_description( (node.name, field.name, lf.name, lf.field) @@ -810,8 +822,8 @@ def _create_graph( logger.debug("Connecting %s to %s", lf.name, node.name) # adding a state from the previous task to other_states if ( - self[lf.name].state - and self[lf.name].state.splitter_rpn_final + graph.node(lf.name).state + and graph.node(lf.name).state.splitter_rpn_final ): # variables that are part of inner splitters should be # treated as a containers @@ -823,7 +835,7 @@ def _create_graph( # adding task_name: (task.state, [a field from the connection] if lf.name not in other_states: other_states[lf.name] = ( - self[lf.name].state, + graph.node(lf.name).state, [field.name], ) else: diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index df27ebeecd..24217cc951 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -18,7 +18,7 @@ from fileformats.core import FileSet if ty.TYPE_CHECKING: - from .specs import TaskDef, Result + from .specs import TaskDef, Result, WorkflowOutputs from .core import Task from pydra.design.base import Field @@ -149,6 +149,7 @@ def save( task : :class:`~pydra.engine.core.TaskBase` Task to pickle and write """ + from pydra.engine.core import is_workflow if task is None and result is None: raise ValueError("Nothing to be saved") @@ -162,9 +163,15 @@ def save( lockfile = task_path.parent / (task_path.name + "_save.lock") with SoftFileLock(lockfile): if result: - if task_path.name.startswith("Workflow") and result.outputs is not None: + if ( + result.definition + and is_workflow(result.definition) + and result.outputs is not None + ): # copy files to the workflow directory - result.outputs = copyfile_workflow(wf_path=task_path, result=result) + result.outputs = copyfile_workflow( + wf_path=task_path, outputs=result.outputs + ) with (task_path / f"{name_prefix}_result.pklz").open("wb") as fp: cp.dump(result, fp) if task: @@ -172,17 +179,19 @@ def save( cp.dump(task, fp) -def copyfile_workflow(wf_path: os.PathLike, result: "Result") -> "Result": +def copyfile_workflow( + wf_path: os.PathLike, outputs: "WorkflowOutputs" +) -> "WorkflowOutputs": """if file in the wf results, the file will be copied to the workflow directory""" from .helpers_file import copy_nested_files - for field in attrs_fields(result.outputs): - value = getattr(result.outputs, field.name) + for field in attrs_fields(outputs): + value = getattr(outputs, field.name) # if the field is a path or it can contain a path _copyfile_single_value is run # to move all files and directories to the workflow directory new_value = copy_nested_files(value, wf_path, mode=FileSet.CopyMode.hardlink) - setattr(result.outputs, field.name, new_value) - return result + setattr(outputs, field.name, new_value) + return outputs def gather_runtime_info(fname): @@ -457,7 +466,7 @@ def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: etype, eval, etr = sys.exc_info() traceback = format_exception(etype, eval, etr) errorfile = record_error(task_pkl.parent, error=traceback) - result = Result(output=None, runtime=None, errored=True) + result = Result(output=None, runtime=None, errored=True, definition=None) save(task_pkl.parent, result=result) raise @@ -472,7 +481,7 @@ def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: traceback = format_exception(etype, eval, etr) errorfile = record_error(task.output_dir, error=traceback) if not resultfile.exists(): # not sure if this is needed - result = Result(output=None, runtime=None, errored=True) + result = Result(output=None, runtime=None, errored=True, definition=None) save(task.output_dir, result=result) e.add_note(f" full crash report is here: {errorfile}") raise diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index d20e524409..438b14dfea 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -519,6 +519,7 @@ class Result(ty.Generic[OutputsType]): outputs: OutputsType | None = None runtime: Runtime | None = None errored: bool = False + definition: TaskDef[OutputsType] | None = None def __getstate__(self): state = attrs_values(self) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 60a949f752..fbadf83dd8 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -31,6 +31,7 @@ from .node import Node from .specs import TaskDef, WorkflowDef from .environments import Environment + from .state import State DefType = ty.TypeVar("DefType", bound="TaskDef") @@ -456,7 +457,7 @@ def __init__( self.queued = {} self.running = {} # Not used in logic, but may be useful for progress tracking self.unrunnable = defaultdict(list) - self.state_names = self.node.state.names + self.state_names = self.node.state.names if self.node.state else [] self.workflow_inputs = workflow_inputs self.graph = None @@ -468,6 +469,10 @@ def inputs(self) -> "Node.Inputs": def _definition(self) -> "Node": return self.node._definition + @property + def state(self) -> "State": + return self.node.state + @property def tasks(self) -> ty.Iterable["Task[DefType]"]: if self._tasks is None: @@ -535,7 +540,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: yield Task( definition=self.node._definition._resolve_lazy_inputs( workflow_inputs=self.workflow_inputs, - exec_graph=self.graph, + graph=self.graph, state_index=None, ), submitter=self.submitter, diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 78c726e082..b71542beb1 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -7,6 +7,9 @@ import pytest import cloudpickle as cp from unittest.mock import Mock +from pydra.engine.submitter import Submitter +from pydra.engine.specs import Result +from pydra.engine.core import Task from fileformats.generic import Directory, File from fileformats.core import FileSet from .utils import Multiply, RaiseXeq1 @@ -24,24 +27,24 @@ def test_save(tmpdir): outdir = Path(tmpdir) with pytest.raises(ValueError): save(tmpdir) - foo = Multiply(name="mult", x=1, y=2) + foo = Task(name="mult", definition=Multiply(x=1, y=2), submitter=Submitter()) # save task save(outdir, task=foo) del foo # load saved task task_pkl = outdir / "_task.pklz" - foo = cp.loads(task_pkl.read_bytes()) + foo: Task = cp.loads(task_pkl.read_bytes()) assert foo.name == "mult" - assert foo.inputs.x == 1 and foo.inputs.y == 2 + assert foo.inputs["x"] == 1 and foo.inputs["y"] == 2 # execute task and save result - res = foo() - assert res.output.out == 2 + res: Result = foo.run() + assert res.outputs.out == 2 save(outdir, result=res) del res # load saved result res_pkl = outdir / "_result.pklz" - res = cp.loads(res_pkl.read_bytes()) - assert res.output.out == 2 + res: Result = cp.loads(res_pkl.read_bytes()) + assert res.outputs.out == 2 def test_hash_file(tmpdir): diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index f81b5aedec..c9dad2616e 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -10,7 +10,7 @@ import typing as ty from fileformats.application import Zip, Json from fileformats.text import TextFile -from ..hash import ( +from pydra.utils.hash import ( Cache, bytes_repr, hash_object, @@ -190,9 +190,9 @@ def test_bytes_repr_type2(): class MyClass(ty.Generic[T]): pass - obj_repr = join_bytes_repr(MyClass[int]) - assert ( - obj_repr == b"type:(pydra.utils.tests.test_hash.MyClass[type:(builtins.int)])" + obj_repr = join_bytes_repr(MyClass[int]).decode() + assert re.match( + r"type:\([\w\.]*test_hash.MyClass\[type:\(builtins.int\)\]\)", obj_repr ) diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index c4c0dd1208..06747f9bba 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -4,18 +4,20 @@ import typing as ty from pathlib import Path import tempfile +from unittest.mock import Mock import pytest from pydra.design import python from fileformats.generic import File from pydra.engine.lazy import LazyOutField +from pydra.design import workflow from ..typing import TypeParser, MultiInputObj from fileformats.application import Json, Yaml, Xml from .utils import ( - generic_func_task, + GenericFuncTask, GenericShellTask, - specific_func_task, + SpecificFuncTask, SpecificShellTask, - other_specific_func_task, + OtherSpecificFuncTask, OtherSpecificShellTask, MyFormatX, MyOtherFormatX, @@ -26,7 +28,8 @@ def lz(tp: ty.Type): """convenience method for creating a LazyField of type 'tp'""" - return LazyOutField(name="foo", field="boo", type=tp) + node = Mock() + return LazyOutField(node=node, field="boo", type=tp) PathTypes = ty.Union[str, os.PathLike] @@ -520,26 +523,22 @@ def test_type_coercion_realistic(): Path.touch(yet_another_file) file_list = [File(p) for p in (a_file, another_file, yet_another_file)] - @python.define - @mark.annotate({"return": {"a": ty.List[File], "b": ty.List[str]}}) + @python.define(outputs={"a": ty.List[File], "b": ty.List[str]}) def f(x: ty.List[File], y: ty.Dict[str, ty.List[File]]): return list(itertools.chain(x, *y.values())), list(y.keys()) - task = f(x=file_list, y={"a": file_list[1:]}) + defn = f(x=file_list, y={"a": file_list[1:]}) + outputs = defn() - TypeParser(ty.List[str])(task.lzout.a) # pylint: disable=no-member + TypeParser(ty.List[str])(outputs.a) # pylint: disable=no-member with pytest.raises( TypeError, + match=r"Incorrect type for field:", ) as exc_info: - TypeParser(ty.List[int])(task.lzout.a) # pylint: disable=no-member - assert exc_info_matches( - exc_info, - match=r"Cannot coerce into ", - regex=True, - ) + TypeParser(ty.List[int])(outputs.a) # pylint: disable=no-member with pytest.raises(TypeError) as exc_info: - task.inputs.x = "bad-value" + defn.x = "bad-value" assert exc_info_matches( exc_info, match="Cannot coerce 'bad-value' into " ) @@ -682,9 +681,9 @@ def test_type_matches(): @pytest.fixture(params=["func", "shell"]) -def generic_task(request): +def GenericTask(request): if request.param == "func": - return generic_func_task + return GenericFuncTask elif request.param == "shell": return GenericShellTask else: @@ -692,9 +691,9 @@ def generic_task(request): @pytest.fixture(params=["func", "shell"]) -def specific_task(request): +def SpecificTask(request): if request.param == "func": - return specific_func_task + return SpecificFuncTask elif request.param == "shell": return SpecificShellTask else: @@ -702,55 +701,29 @@ def specific_task(request): @pytest.fixture(params=["func", "shell"]) -def other_specific_task(request): +def OtherSpecificTask(request): if request.param == "func": - return other_specific_func_task + return OtherSpecificFuncTask elif request.param == "shell": return OtherSpecificShellTask else: assert False -def test_typing_implicit_cast_from_super(tmp_path, generic_task, specific_task): +def test_typing_implicit_cast_from_super(tmp_path, GenericTask, SpecificTask): """Check the casting of lazy fields and whether specific file-sets can be recovered from generic `File` classes""" - wf = Workflow( - name="test", - input_spec={"in_file": MyFormatX}, - output_spec={"out_file": MyFormatX}, - ) - - wf.add( - specific_task( - in_file=wf.lzin.in_file, - name="specific1", - ) - ) - - wf.add( # Generic task - generic_task( - in_file=wf.specific1.lzout.out, - name="generic", - ) - ) - - wf.add( - specific_task( - in_file=wf.generic.lzout.out, - name="specific2", - ) - ) - - wf.set_output( - [ - ("out_file", wf.specific2.lzout.out), - ] - ) + @workflow.define(outputs=["out_file"]) + def Workflow(in_file: MyFormatX) -> MyFormatX: + specific1 = workflow.add(SpecificTask(in_file=in_file)) + generic = workflow.add(GenericTask(in_file=specific1.out)) # Generic task + specific2 = workflow.add(SpecificTask(in_file=generic.out), name="specific2") + return specific2.out in_file = MyFormatX.sample() - outputs = wf(in_file=in_file, plugin="serial") + outputs = Workflow(in_file=in_file)() out_file: MyFormatX = outputs.out_file assert type(out_file) is MyFormatX @@ -759,66 +732,38 @@ def test_typing_implicit_cast_from_super(tmp_path, generic_task, specific_task): assert out_file.header.parent != in_file.header.parent -def test_typing_cast(tmp_path, specific_task, other_specific_task): +def test_typing_cast(tmp_path, SpecificTask, OtherSpecificTask): """Check the casting of lazy fields and whether specific file-sets can be recovered from generic `File` classes""" - wf = Workflow( - name="test", - input_spec={"in_file": MyFormatX}, - output_spec={"out_file": MyFormatX}, - ) + @workflow.define(outputs=["out_file"]) + def Workflow(in_file: MyFormatX) -> MyFormatX: + entry = workflow.add(SpecificTask(in_file=in_file)) - wf.add( - specific_task( - in_file=wf.lzin.in_file, - name="entry", - ) - ) + with pytest.raises(TypeError) as exc_info: + # No cast of generic task output to MyFormatX + workflow.add(OtherSpecificTask(in_file=entry.out)) # Generic task + assert exc_info_matches(exc_info, "Cannot coerce") - with pytest.raises(TypeError) as exc_info: - # No cast of generic task output to MyFormatX - wf.add( # Generic task - other_specific_task( - in_file=wf.entry.lzout.out, - name="inner", - ) + inner = workflow.add( # Generic task + OtherSpecificTask(in_file=entry.out.cast(MyOtherFormatX)) ) - assert exc_info_matches(exc_info, "Cannot coerce") - wf.add( # Generic task - other_specific_task( - in_file=wf.entry.lzout.out.cast(MyOtherFormatX), - name="inner", - ) - ) + with pytest.raises(TypeError) as exc_info: + # No cast of generic task output to MyFormatX + workflow.add(SpecificTask(in_file=inner.out)) - with pytest.raises(TypeError) as exc_info: - # No cast of generic task output to MyFormatX - wf.add( - specific_task( - in_file=wf.inner.lzout.out, - name="exit", - ) - ) - assert exc_info_matches(exc_info, "Cannot coerce") + assert exc_info_matches(exc_info, "Cannot coerce") - wf.add( - specific_task( - in_file=wf.inner.lzout.out.cast(MyFormatX), - name="exit", + exit = workflow.add( + SpecificTask(in_file=inner.out.cast(MyFormatX)), name="exit" ) - ) - wf.set_output( - [ - ("out_file", wf.exit.lzout.out), - ] - ) + return exit.out in_file = MyFormatX.sample() - outputs = wf(in_file=in_file, plugin="serial") + outputs = Workflow(in_file=in_file)() out_file: MyFormatX = outputs.out_file assert type(out_file) is MyFormatX diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 411435234d..e559e371ad 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -1,12 +1,11 @@ -from fileformats.generic import File +from fileformats.generic import File, BinaryFile from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber -from pydra.design import python from pydra.engine.task import ShellTask from pydra.engine import specs from pydra.design import shell, python -class MyFormat(WithMagicNumber, File): +class MyFormat(WithMagicNumber, BinaryFile): ext = ".my" magic_number = b"MYFORMAT" @@ -19,14 +18,14 @@ class MyFormatX(WithSeparateHeader, MyFormat): header_type = MyHeader -class MyOtherFormatX(WithMagicNumber, WithSeparateHeader, File): +class MyOtherFormatX(WithMagicNumber, WithSeparateHeader, BinaryFile): magic_number = b"MYFORMAT" ext = ".my" header_type = MyHeader @python.define -def generic_func_task(in_file: File) -> File: +def GenericFuncTask(in_file: File) -> File: return in_file @@ -52,7 +51,7 @@ class Outputs(specs.ShellOutputs): @python.define -def specific_func_task(in_file: MyFormatX) -> MyFormatX: +def SpecificFuncTask(in_file: MyFormatX) -> MyFormatX: return in_file @@ -77,7 +76,7 @@ class Outputs(specs.ShellOutputs): @python.define -def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: +def OtherSpecificFuncTask(in_file: MyOtherFormatX) -> MyOtherFormatX: return in_file diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 01d9de784f..482d976826 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -623,6 +623,12 @@ def check_coercible(self, source: ty.Any, target: ty.Union[type, ty.Any]): If the object cannot be coerced into the target type depending on the explicit inclusions and exclusions set in the `coercible` and `not_coercible` member attrs """ + if ( + isinstance(source, ty.Sequence) + and issubclass(target, generic.FileSet) + and all(isinstance(p, os.PathLike) for p in source) + ): + return True self.check_type_coercible(type(source), target, source_repr=repr(source)) def check_type_coercible( From e015f8cb3fabcab01d2207d706f37cd541c39974 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 17:27:16 +1100 Subject: [PATCH 164/342] fixed test_hash error --- pydra/utils/tests/test_hash.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index c9dad2616e..0730624f1b 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -423,8 +423,7 @@ def __repr__(self): with pytest.raises( TypeError, match=( - "unhashable\nand therefore cannot hash `A()` of type " - "`pydra.utils.tests.test_hash.A`" + r"unhashable\nand therefore cannot hash `A\(\)` of type `.*\.test_hash\.A`" ), ): hash_object(A()) From 61e6439b51708ad44b8c3226f36205a34aa2af13 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 30 Jan 2025 17:32:05 +1100 Subject: [PATCH 165/342] debugging test_tasks --- pydra/engine/tests/test_task.py | 55 +++++++++++++-------------------- 1 file changed, 22 insertions(+), 33 deletions(-) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 761b0fca57..18a11f30c6 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -30,40 +30,32 @@ @python.define -def funaddtwo(a): +def FunAddTwo(a): return a + 2 def test_output(): - nn = funaddtwo(a=3) - res = nn._run() - assert res.output.out == 5 - - -def test_name_conflict(): - """raise error if task name conflicts with a class attribute or method""" - with pytest.raises(ValueError) as excinfo1: - funaddtwo(name="split", a=3) - assert "Cannot use names of attributes or methods" in str(excinfo1.value) - with pytest.raises(ValueError) as excinfo2: - funaddtwo(name="checksum", a=3) - assert "Cannot use names of attributes or methods" in str(excinfo2.value) + nn = FunAddTwo(a=3) + outputs = nn() + assert outputs.out == 5 def test_numpy(): """checking if mark.task works for numpy functions""" np = pytest.importorskip("numpy") - fft = mark.annotate({"a": np.ndarray, "return": np.ndarray})(np.fft.fft) - fft = mark.task(fft)() + FFT = python.define(inputs={"a": np.ndarray}, outputs={"out": np.ndarray})( + np.fft.fft + ) + arr = np.array([[1, 10], [2, 20]]) - fft.definition.a = arr - res = fft() - assert np.allclose(np.fft.fft(arr), res.output.out) + fft = FFT(a=arr) + outputs = fft() + assert np.allclose(np.fft.fft(arr), outputs.out) @pytest.mark.xfail(reason="cp.dumps(func) depends on the system/setup, TODO!!") def test_checksum(): - nn = funaddtwo(a=3) + nn = FunAddTwo(a=3) assert ( nn.checksum == "PythonTask_abb4e7cc03b13d0e73884b87d142ed5deae6a312275187a9d8df54407317d7d3" @@ -71,10 +63,8 @@ def test_checksum(): def test_annotated_func(): - @python.define - def testfunc( - a: int, b: float = 0.1 - ) -> ty.NamedTuple("Output", [("out_out", float)]): + @python.define(outputs=["out_out"]) + def testfunc(a: int, b: float = 0.1) -> float: return a + b funky = testfunc(a=1) @@ -89,7 +79,6 @@ def testfunc( assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum outputs = funky() - assert hasattr(result, "output") assert hasattr(outputs, "out_out") assert outputs.out_out == 1.1 @@ -1319,18 +1308,18 @@ def test_shell_cmd(tmpdir): def test_functask_callable(tmpdir): # no submitter or plugin - foo = funaddtwo(a=1) + foo = FunAddTwo(a=1) res = foo() assert res.output.out == 3 assert foo.plugin is None # plugin - bar = funaddtwo(a=2) + bar = FunAddTwo(a=2) res = bar(plugin="cf") assert res.output.out == 4 assert bar.plugin is None - foo2 = funaddtwo(a=3) + foo2 = FunAddTwo(a=3) foo2.plugin = "cf" res = foo2() assert res.output.out == 5 @@ -1338,7 +1327,7 @@ def test_functask_callable(tmpdir): def test_taskhooks_1(tmpdir, capsys): - foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) + foo = FunAddTwo(name="foo", a=1, cache_dir=tmpdir) assert foo.hooks # ensure all hooks are defined for attr in ("pre_run", "post_run", "pre_run_task", "post_run_task"): @@ -1369,7 +1358,7 @@ def myhook(task, *args): del captured # hooks are independent across tasks by default - bar = funaddtwo(name="bar", a=3, cache_dir=tmpdir) + bar = FunAddTwo(name="bar", a=3, cache_dir=tmpdir) assert bar.hooks is not foo.hooks # but can be shared across tasks bar.hooks = foo.hooks @@ -1393,7 +1382,7 @@ def myhook(task, *args): def test_taskhooks_2(tmpdir, capsys): """checking order of the hooks; using task's attributes""" - foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) + foo = FunAddTwo(name="foo", a=1, cache_dir=tmpdir) def myhook_prerun(task, *args): print(f"i. prerun hook was called from {task.name}") @@ -1424,7 +1413,7 @@ def myhook_postrun(task, *args): def test_taskhooks_3(tmpdir, capsys): """checking results in the post run hooks""" - foo = funaddtwo(name="foo", a=1, cache_dir=tmpdir) + foo = FunAddTwo(name="foo", a=1, cache_dir=tmpdir) def myhook_postrun_task(task, result, *args): print(f"postrun task hook, the result is {outputs.out}") @@ -1445,7 +1434,7 @@ def myhook_postrun(task, result, *args): def test_taskhooks_4(tmpdir, capsys): """task raises an error: postrun task should be called, postrun shouldn't be called""" - foo = funaddtwo(name="foo", a="one", cache_dir=tmpdir) + foo = FunAddTwo(name="foo", a="one", cache_dir=tmpdir) def myhook_postrun_task(task, result, *args): print(f"postrun task hook was called, result object is {result}") From 3cc523262f102c9eed73c606e4c1a909b88c7483 Mon Sep 17 00:00:00 2001 From: "Thomas G. Close" Date: Sat, 1 Feb 2025 07:33:20 +1100 Subject: [PATCH 166/342] debugging workflows --- .python-version | 1 + new-docs/source/index.rst | 6 +- ...ution.ipynb => 2-advanced-execution.ipynb} | 0 .../source/tutorial/2-troubleshooting.ipynb | 31 ---- .../source/tutorial/3-troubleshooting.ipynb | 143 ++++++++++++++++++ new-docs/source/tutorial/tst.py | 31 ++-- pydra/design/base.py | 2 +- pydra/design/python.py | 2 +- pydra/design/shell.py | 2 +- pydra/design/workflow.py | 2 +- pydra/engine/core.py | 4 +- pydra/engine/lazy.py | 6 +- pydra/engine/node.py | 4 +- pydra/engine/specs.py | 8 +- pydra/engine/state.py | 4 + pydra/engine/submitter.py | 1 - pydra/utils/hash.py | 4 +- pydra/utils/misc.py | 2 +- pydra/utils/typing.py | 4 +- 19 files changed, 196 insertions(+), 61 deletions(-) create mode 100644 .python-version rename new-docs/source/tutorial/{3-advanced-execution.ipynb => 2-advanced-execution.ipynb} (100%) delete mode 100644 new-docs/source/tutorial/2-troubleshooting.ipynb create mode 100644 new-docs/source/tutorial/3-troubleshooting.ipynb diff --git a/.python-version b/.python-version new file mode 100644 index 0000000000..c10780c628 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13.1 diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index 2b12f4bbc6..a1b3417ea9 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -72,8 +72,8 @@ Execution Learn how to execute existing tasks (including workflows) on different systems * :ref:`Getting started` -* :ref:`Troubleshooting` * :ref:`Advanced execution` +* :ref:`Troubleshooting` Design ~~~~~~ @@ -120,8 +120,8 @@ See the full reference documentation for Pydra :hidden: tutorial/1-getting-started - tutorial/2-troubleshooting - tutorial/3-advanced-execution + tutorial/2-advanced-execution + tutorial/3-troubleshooting .. toctree:: :maxdepth: 2 diff --git a/new-docs/source/tutorial/3-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb similarity index 100% rename from new-docs/source/tutorial/3-advanced-execution.ipynb rename to new-docs/source/tutorial/2-advanced-execution.ipynb diff --git a/new-docs/source/tutorial/2-troubleshooting.ipynb b/new-docs/source/tutorial/2-troubleshooting.ipynb deleted file mode 100644 index 1fe08f938a..0000000000 --- a/new-docs/source/tutorial/2-troubleshooting.ipynb +++ /dev/null @@ -1,31 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Troubleshooting\n", - "\n", - "Failures are common in scientific analysis, even for well tested workflows, due to\n", - "the novel nature and of scientific experiments and known artefacts that can occur.\n", - "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", - "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", - "issue occurred.\n", - "\n", - "Work in progress..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb new file mode 100644 index 0000000000..26b82977b7 --- /dev/null +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -0,0 +1,143 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Troubleshooting\n", + "\n", + "Failures are common in scientific analysis, even for well tested workflows, due to\n", + "the novel nature and of scientific experiments and known artefacts that can occur.\n", + "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", + "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", + "issue occurred. \n", + "\n", + "If running in debug mode (the default), runtime exceptions will be raised to the\n", + "call shell or debugger. However, when using asynchronous workers the errors will\n", + "be saved in `_error.pklz` pickle files inside the task's cache directory. For\n", + "example, given the following toy example" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev165+g61e6439b.d20250131\n" + ] + }, + { + "ename": "TypeError", + "evalue": "Incorrect type for field in 'x' field of Divide interface : [15.0] is not of type (and cannot be coerced to it)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:262\u001b[0m, in \u001b[0;36mTypeParser.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 262\u001b[0m coerced \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:429\u001b[0m, in \u001b[0;36mTypeParser.coerce\u001b[0;34m(self, object_)\u001b[0m\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 429\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:411\u001b[0m, in \u001b[0;36mTypeParser.coerce\u001b[0;34m(self, object_)\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 411\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mexpand_and_coerce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobject_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# Defial handling for MultiInputObjects (which are annoying)\u001b[39;00m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:280\u001b[0m, in \u001b[0;36mTypeParser.coerce..expand_and_coerce\u001b[0;34m(obj, pattern)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pattern, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m--> 280\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_basic\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 281\u001b[0m origin, pattern_args \u001b[38;5;241m=\u001b[39m pattern\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:316\u001b[0m, in \u001b[0;36mTypeParser.coerce..coerce_basic\u001b[0;34m(obj, pattern)\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n\u001b[0;32m--> 316\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_coercible\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m coerce_obj(obj, pattern)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:632\u001b[0m, in \u001b[0;36mTypeParser.check_coercible\u001b[0;34m(self, source, target)\u001b[0m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 632\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_type_coercible\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource_repr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:688\u001b[0m, in \u001b[0;36mTypeParser.check_type_coercible\u001b[0;34m(self, source, target, source_repr)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m matches_criteria(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoercible):\n\u001b[0;32m--> 688\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 689\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot coerce \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msource_repr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m into \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtarget\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m as the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 690\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoercion doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt match any of the explicit inclusion criteria: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 691\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\n\u001b[1;32m 692\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtype_name(s)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m -> \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtype_name(t)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m s, t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoercible\n\u001b[1;32m 693\u001b[0m )\n\u001b[1;32m 694\u001b[0m )\n\u001b[1;32m 695\u001b[0m matches_not_coercible \u001b[38;5;241m=\u001b[39m matches_criteria(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnot_coercible)\n", + "\u001b[0;31mTypeError\u001b[0m: Cannot coerce [15.0] into in 'x' field of Divide interface as the coercion doesn't match any of the explicit inclusion criteria: Sequence -> Sequence, Mapping -> Mapping, Path -> PathLike, str -> PathLike, PathLike -> Path, PathLike -> str, Any -> MultiInputObj, int -> float, Integer -> float, int -> Decimal, Boolean -> bool, Decimal -> float, Integer -> int, Text -> str, bool -> Boolean, float -> Decimal, int -> Integer, str -> Text, integer -> int, floating -> float, bool -> bool, integer -> float, character -> str, complexfloating -> complex, bytes_ -> bytes, ndarray -> Sequence, Sequence -> ndarray", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 27\u001b[0m\n\u001b[1;32m 24\u001b[0m failing_workflow \u001b[38;5;241m=\u001b[39m UnsafeWorkflow(a\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, b\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m)\u001b[38;5;241m.\u001b[39msplit(c\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m2\u001b[39m ,\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter() \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[0;32m---> 27\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msub\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfailing_workflow\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:193\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, task_def)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloop\u001b[38;5;241m.\u001b[39mrun_until_complete(\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun_async(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n\u001b[1;32m 191\u001b[0m )\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_start_time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:160\u001b[0m, in \u001b[0;36mDebugWorker.run\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mrun\u001b[39m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 156\u001b[0m task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 157\u001b[0m rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 158\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 159\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run a task.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:372\u001b[0m, in \u001b[0;36mTask.run\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 372\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefinition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 373\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:706\u001b[0m, in \u001b[0;36mWorkflowDef._run\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_run\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 705\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 706\u001b[0m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmitter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpand_workflow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:242\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m tasks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;129;01mnot\u001b[39;00m n\u001b[38;5;241m.\u001b[39mdone \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m exec_graph\u001b[38;5;241m.\u001b[39mnodes):\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 243\u001b[0m tasks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_runnable_tasks(exec_graph)\n\u001b[1;32m 244\u001b[0m workflow_task\u001b[38;5;241m.\u001b[39mreturn_values \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mworkflow\u001b[39m\u001b[38;5;124m\"\u001b[39m: wf, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexec_graph\u001b[39m\u001b[38;5;124m\"\u001b[39m: exec_graph}\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:160\u001b[0m, in \u001b[0;36mDebugWorker.run\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mrun\u001b[39m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 156\u001b[0m task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 157\u001b[0m rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 158\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 159\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run a task.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:372\u001b[0m, in \u001b[0;36mTask.run\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 372\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefinition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 373\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:706\u001b[0m, in \u001b[0;36mWorkflowDef._run\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_run\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 705\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 706\u001b[0m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmitter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpand_workflow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:243\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n\u001b[0;32m--> 243\u001b[0m tasks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_runnable_tasks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexec_graph\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 244\u001b[0m workflow_task\u001b[38;5;241m.\u001b[39mreturn_values \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mworkflow\u001b[39m\u001b[38;5;124m\"\u001b[39m: wf, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexec_graph\u001b[39m\u001b[38;5;124m\"\u001b[39m: exec_graph}\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:397\u001b[0m, in \u001b[0;36mSubmitter.get_runnable_tasks\u001b[0;34m(self, graph)\u001b[0m\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m node\u001b[38;5;241m.\u001b[39mstarted:\n\u001b[1;32m 396\u001b[0m not_started\u001b[38;5;241m.\u001b[39madd(node)\n\u001b[0;32m--> 397\u001b[0m tasks\u001b[38;5;241m.\u001b[39mextend(\u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_runnable_tasks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_locks(tasks)\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tasks\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:580\u001b[0m, in \u001b[0;36mNodeExecution.get_runnable_tasks\u001b[0;34m(self, graph)\u001b[0m\n\u001b[1;32m 563\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"For a given node, check to see which tasks have been successfully run, are ready\u001b[39;00m\n\u001b[1;32m 564\u001b[0m \u001b[38;5;124;03mto run, can't be run due to upstream errors, or are blocked on other tasks to complete.\u001b[39;00m\n\u001b[1;32m 565\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;124;03m List of tasks that are ready to run\u001b[39;00m\n\u001b[1;32m 578\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 579\u001b[0m runnable: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtasks\u001b[49m \u001b[38;5;66;03m# Ensure tasks are loaded\u001b[39;00m\n\u001b[1;32m 581\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstarted:\n\u001b[1;32m 582\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocked \u001b[38;5;241m=\u001b[39m copy(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks)\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:479\u001b[0m, in \u001b[0;36mNodeExecution.tasks\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mtasks\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ty\u001b[38;5;241m.\u001b[39mIterable[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 479\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks \u001b[38;5;241m=\u001b[39m \u001b[43m{\u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate_index\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_tasks\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks\u001b[38;5;241m.\u001b[39mvalues()\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:541\u001b[0m, in \u001b[0;36mNodeExecution._generate_tasks\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_generate_tasks\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ty\u001b[38;5;241m.\u001b[39mIterable[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m Task(\n\u001b[0;32m--> 541\u001b[0m definition\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_definition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_resolve_lazy_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 542\u001b[0m \u001b[43m \u001b[49m\u001b[43mworkflow_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworkflow_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 543\u001b[0m \u001b[43m \u001b[49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 544\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 546\u001b[0m submitter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubmitter,\n\u001b[1;32m 547\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode\u001b[38;5;241m.\u001b[39mname,\n\u001b[1;32m 548\u001b[0m )\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, split_defn \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode\u001b[38;5;241m.\u001b[39m_split_definition()\u001b[38;5;241m.\u001b[39mitems():\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:416\u001b[0m, in \u001b[0;36mTaskDef._resolve_lazy_inputs\u001b[0;34m(self, workflow_inputs, graph, state_index)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, LazyOutField):\n\u001b[1;32m 415\u001b[0m resolved[name] \u001b[38;5;241m=\u001b[39m value\u001b[38;5;241m.\u001b[39mget_value(graph, state_index)\n\u001b[0;32m--> 416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mattrs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevolve\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresolved\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.pyenv/versions/3.13.1/lib/python3.13/site-packages/attr/_make.py:624\u001b[0m, in \u001b[0;36mevolve\u001b[0;34m(*args, **changes)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m init_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m changes:\n\u001b[1;32m 622\u001b[0m changes[init_name] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(inst, attr_name)\n\u001b[0;32m--> 624\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mchanges\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m:8\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, x, y, function)\u001b[0m\n\u001b[1;32m 6\u001b[0m _setattr(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_hashes\u001b[39m\u001b[38;5;124m'\u001b[39m, attr_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_hashes\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mdefault)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m NOTHING:\n\u001b[0;32m----> 8\u001b[0m _setattr(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[43m__attr_converter_x\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 10\u001b[0m _setattr(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m, __attr_converter_x(__attr_factory_x()))\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:264\u001b[0m, in \u001b[0;36mTypeParser.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 262\u001b[0m coerced \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoerce(obj)\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 264\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 265\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIncorrect type for field\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobj\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m is not of type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m (and cannot be coerced to it)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 267\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m coerced\n", + "\u001b[0;31mTypeError\u001b[0m: Incorrect type for field in 'x' field of Divide interface : [15.0] is not of type (and cannot be coerced to it)" + ] + } + ], + "source": [ + "from pydra.design import python, workflow\n", + "from pydra.engine.submitter import Submitter\n", + "\n", + "@python.define\n", + "def Add(x: float, y: float) -> float:\n", + " return x + y\n", + "\n", + "@python.define\n", + "def Subtract(x: float, y: float) -> float:\n", + " return x - y\n", + "\n", + "@python.define\n", + "def Divide(x: float, y: float) -> float:\n", + " return x / y\n", + "\n", + "@workflow.define\n", + "def UnsafeWorkflow(a: float, b: float, c: float) -> float:\n", + " add = workflow.add(Add(x=a, y=b))\n", + " divide = workflow.add(Divide(x=add.out, y=c))\n", + " subtract = workflow.add(Subtract(x=divide.out, y=b))\n", + " return subtract.out\n", + "\n", + "# This workflow will fail because we are trying to divide by 0\n", + "failing_workflow = UnsafeWorkflow(a=10, b=5).split(c=[3, 2 ,0])\n", + "\n", + "with Submitter() as sub:\n", + " result = sub(failing_workflow)\n", + " \n", + "if result.errored:\n", + " print(\"Workflow failed with errors:\\n\" + str(result.errors))\n", + "else:\n", + " print(\"Workflow completed successfully :)\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stray lockfiles while debugging\n", + "\n", + "During the execution of a task, a lockfile is generated to signify that a task is running. If a task/workflow is terminated by an interactive debugger these lockfiles can hang around." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 2a91a6edbf..1a38efcebb 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,16 +1,27 @@ -from pydra.design import python +from pydra.design import python, workflow +from pydra.engine.submitter import Submitter -if __name__ == "__main__": +@python.define +def Add(x: float, y: float) -> float: + return x + y - @python.define - def TenToThePower(p: int) -> int: - return 10**p +@python.define +def Subtract(x: float, y: float) -> float: + return x - y - ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) +@python.define +def Divide(x: float, y: float) -> float: + return x / y - # Run the 5 tasks in parallel split across 3 processes - outputs = ten_to_the_power(worker="cf", n_procs=3, clean_stale_locks=True) +@workflow.define +def UnsafeWorkflow(a: float, b: float, c: float) -> float: + add = workflow.add(Add(x=a, y=b)) + divide = workflow.add(Divide(x=add.out, y=c)) + subtract = workflow.add(Subtract(x=divide.out, y=b)) + return subtract.out - p1, p2, p3, p4, p5 = outputs.out +# This workflow will fail because we are trying to divide by 0 +failing_workflow = UnsafeWorkflow(a=10, b=5).split(c=[3, 2 ,0]) - print(f"10^5 = {p5}") +with Submitter() as sub: + result = sub(failing_workflow) \ No newline at end of file diff --git a/pydra/design/base.py b/pydra/design/base.py index a1f5d4d6cf..9d26effb8f 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -5,7 +5,7 @@ import enum from pathlib import Path from copy import copy -from typing_extensions import Self +from typing import Self import attrs.validators from attrs.converters import default_if_none from fileformats.generic import File diff --git a/pydra/design/python.py b/pydra/design/python.py index 7fcbb7af65..9dde3098f7 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -1,6 +1,6 @@ import typing as ty import inspect -from typing_extensions import dataclass_transform +from typing import dataclass_transform import attrs from .base import ( Arg, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 08432ffeb2..c182223653 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -8,7 +8,7 @@ from copy import copy import attrs import builtins -from typing_extensions import dataclass_transform +from typing import dataclass_transform from fileformats.core import from_mime from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 928ef4e879..020cad7622 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -1,6 +1,6 @@ import typing as ty import inspect -from typing_extensions import dataclass_transform +from typing import dataclass_transform import attrs from .base import ( Arg, diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 058fed232f..c4e69974d8 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -14,7 +14,7 @@ import cloudpickle as cp from copy import copy from operator import itemgetter -from typing_extensions import Self +from typing import Self import attrs from filelock import SoftFileLock from pydra.engine.specs import TaskDef, WorkflowDef, TaskOutputs, WorkflowOutputs @@ -25,7 +25,7 @@ from pydra.utils.typing import TypeParser, StateArray from .node import Node from datetime import datetime -from fileformats.generic import FileSet +from fileformats.core import FileSet from .specs import ( RuntimeSpec, Result, diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 9f5538d19c..02d4f30243 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -1,6 +1,6 @@ import typing as ty import abc -from typing_extensions import Self +from typing import Self import attrs from pydra.utils.typing import StateArray from pydra.utils.hash import hash_single @@ -141,6 +141,10 @@ def get_value( from pydra.utils.typing import ( TypeParser, ) # pylint: disable=import-outside-toplevel + from pydra.engine.state import StateIndex + + if state_index is None: + state_index = StateIndex() task = graph.node(self.node.name).task(state_index) _, split_depth = TypeParser.strip_splits(self.type) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 1ad45dd008..8fd2e7ab47 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -154,7 +154,7 @@ def combiner(self): return () return self._state.combiner - def _checksum_states(self, state_index=None): + def _checksum_states(self, state_index: StateIndex = StateIndex()): """ Calculate a checksum for the specific state or all of the states of the task. Replaces state-arrays in the inputs fields with a specific values for states. @@ -172,7 +172,7 @@ def _checksum_states(self, state_index=None): # } from pydra.engine.specs import WorkflowDef - if state_index is not None: + if state_index: inputs_copy = copy(self._definition) for key, ind in self.state.inputs_ind[state_index].items(): val = self._extract_input_el( diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 438b14dfea..03854138d7 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -13,10 +13,10 @@ import typing as ty from glob import glob from copy import deepcopy -from typing_extensions import Self +from typing import Self import attrs import cloudpickle as cp -from fileformats.generic import FileSet +from fileformats.core import FileSet from pydra.utils.messenger import AuditFlag, Messenger from pydra.utils.typing import TypeParser from .helpers import ( @@ -407,6 +407,10 @@ def _resolve_lazy_inputs( Self The task definition with all lazy fields resolved """ + from pydra.engine.state import StateIndex + + if state_index is None: + state_index = StateIndex() resolved = {} for name, value in attrs_values(self).items(): if isinstance(value, LazyInField): diff --git a/pydra/engine/state.py b/pydra/engine/state.py index ef65487ca9..1f0bb86918 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -29,6 +29,7 @@ class StateIndex: indices : dict[str, int] a dictionary of indices for each input field """ + indices: OrderedDict[str, int] def __init__(self, indices: dict[str, int] | None = None): # We used ordered dict here to ensure the keys are always in the same order @@ -52,6 +53,9 @@ def __eq__(self, other): def __str__(self): return "__".join(f"{n}-{i}" for n, i in self.indices.items()) + + def __bool__(self): + return bool(self.indices) class State: diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index fbadf83dd8..0d3f71b9ed 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -541,7 +541,6 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: definition=self.node._definition._resolve_lazy_inputs( workflow_inputs=self.workflow_inputs, graph=self.graph, - state_index=None, ), submitter=self.submitter, name=self.node.name, diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index e76b932748..333127a047 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -33,12 +33,12 @@ try: from typing import Protocol except ImportError: - from typing_extensions import Protocol # type: ignore + from typing import Protocol # type: ignore try: from typing import runtime_checkable except ImportError: - from typing_extensions import runtime_checkable # type: ignore + from typing import runtime_checkable # type: ignore try: diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index 30d1e29e5b..74b41e9826 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -4,7 +4,7 @@ import inspect import platformdirs import builtins -from pydra._version import __version__ +from pydra.engine._version import __version__ user_cache_dir = Path( platformdirs.user_cache_dir( diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 482d976826..5698a7edac 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -16,7 +16,7 @@ from typing import get_origin, get_args except ImportError: # Python < 3.8 - from typing_extensions import get_origin, get_args # type: ignore + from typing import get_origin, get_args # type: ignore if sys.version_info >= (3, 10): UNION_TYPES = (ty.Union, types.UnionType) @@ -625,7 +625,7 @@ def check_coercible(self, source: ty.Any, target: ty.Union[type, ty.Any]): """ if ( isinstance(source, ty.Sequence) - and issubclass(target, generic.FileSet) + and issubclass(target, core.FileSet) and all(isinstance(p, os.PathLike) for p in source) ): return True From d2ab96f37fc9ab78092044a254a2bc4352950e9f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 31 Jan 2025 20:34:27 +0000 Subject: [PATCH 167/342] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- new-docs/source/tutorial/tst.py | 9 +++++++-- pydra/engine/lazy.py | 2 +- pydra/engine/specs.py | 2 +- pydra/engine/state.py | 3 ++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 1a38efcebb..173a3b5e58 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,18 +1,22 @@ from pydra.design import python, workflow from pydra.engine.submitter import Submitter + @python.define def Add(x: float, y: float) -> float: return x + y + @python.define def Subtract(x: float, y: float) -> float: return x - y + @python.define def Divide(x: float, y: float) -> float: return x / y + @workflow.define def UnsafeWorkflow(a: float, b: float, c: float) -> float: add = workflow.add(Add(x=a, y=b)) @@ -20,8 +24,9 @@ def UnsafeWorkflow(a: float, b: float, c: float) -> float: subtract = workflow.add(Subtract(x=divide.out, y=b)) return subtract.out + # This workflow will fail because we are trying to divide by 0 -failing_workflow = UnsafeWorkflow(a=10, b=5).split(c=[3, 2 ,0]) +failing_workflow = UnsafeWorkflow(a=10, b=5).split(c=[3, 2, 0]) with Submitter() as sub: - result = sub(failing_workflow) \ No newline at end of file + result = sub(failing_workflow) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 02d4f30243..37ab1f00f5 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -142,7 +142,7 @@ def get_value( TypeParser, ) # pylint: disable=import-outside-toplevel from pydra.engine.state import StateIndex - + if state_index is None: state_index = StateIndex() diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 03854138d7..019151e2e0 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -408,7 +408,7 @@ def _resolve_lazy_inputs( The task definition with all lazy fields resolved """ from pydra.engine.state import StateIndex - + if state_index is None: state_index = StateIndex() resolved = {} diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 1f0bb86918..96c2881189 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -29,6 +29,7 @@ class StateIndex: indices : dict[str, int] a dictionary of indices for each input field """ + indices: OrderedDict[str, int] def __init__(self, indices: dict[str, int] | None = None): @@ -53,7 +54,7 @@ def __eq__(self, other): def __str__(self): return "__".join(f"{n}-{i}" for n, i in self.indices.items()) - + def __bool__(self): return bool(self.indices) From e2337f9bcec616d13ee3ef04e59e0a171b163013 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Feb 2025 16:10:48 +1100 Subject: [PATCH 168/342] moved telemetry check into submitter from task --- .../source/tutorial/3-troubleshooting.ipynb | 173 ++++++++++-------- new-docs/source/tutorial/tst.py | 41 ++--- pydra/engine/core.py | 6 +- pydra/engine/helpers.py | 2 +- pydra/engine/specs.py | 8 + pydra/engine/submitter.py | 7 +- 6 files changed, 128 insertions(+), 109 deletions(-) diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 26b82977b7..99db0e35e2 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -6,11 +6,80 @@ "source": [ "# Troubleshooting\n", "\n", - "Failures are common in scientific analysis, even for well tested workflows, due to\n", - "the novel nature and of scientific experiments and known artefacts that can occur.\n", - "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", - "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", - "issue occurred. \n", + "This tutorial steps through tecnhiques to identify errors and pipeline failures, and\n", + "avoid common pitfalls." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# This is needed to run parallel workflows in Jupyter notebooks\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Things to check first\n", + "\n", + "### Running in *debug* mode\n", + "\n", + "By default, Pydra will run with the *debug* worker, which executes each task serially\n", + "within a single process without use of `async/await` blocks, to allow raised exceptions\n", + "to propagate gracefully to the calling code. If you are having trouble with a pipeline,\n", + "ensure that `worker=debug` is passed to the submission/execution call (the default).\n", + "\n", + "\n", + "## Enclosing multi-process code within `if __name__ == \"__main__\"`\n", + "\n", + "If using the concurrent futures worker (`worker=\"cf\"`) on macOS or Windows, then you need\n", + "to enclose top-level scripts within `if __name__ == \"__main__\"` blocks, e.g." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.tasks.testing import UnsafeDivisionWorkflow\n", + "from pydra.engine.submitter import Submitter\n", + "\n", + "# This workflow will fail because we are trying to divide by 0\n", + "wf = UnsafeDivisionWorkflow(a=10, b=5, denominator=2)\n", + "\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### Remove stray lockfiles\n", + "\n", + "During the execution of a task, a lockfile is generated to signify that a task is running.\n", + "These lockfiles are released after a task completes, either successfully or with an error,\n", + "within a *try/finally* block. However, if a task/workflow is terminated by an interactive\n", + "debugger the finally block may not be executed causing stray lockfiles to hang around. This\n", + "can cause the Pydra to hang waiting for the lock to be released. If you suspect this to be\n", + "an issue, and there are no other jobs running, then simply remove all lock files from your\n", + "cache directory (e.g. `rm /*.lock`) and re-submit your job.\n", + "\n", + "If the `clean_stale_locks` flag is set (by default when using the *debug* worker), locks that\n", + "were created before the outer task was submitted are removed before the task is run.\n", + "However, since these locks could be created by separate submission processes, ``clean_stale_locks`\n", + "is not switched on by default when using production workers (e.g. `cf`, `slurm`, etc...).\n", + "\n", + "## Locating error messages\n", "\n", "If running in debug mode (the default), runtime exceptions will be raised to the\n", "call shell or debugger. However, when using asynchronous workers the errors will\n", @@ -20,82 +89,21 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev165+g61e6439b.d20250131\n" - ] - }, - { - "ename": "TypeError", - "evalue": "Incorrect type for field in 'x' field of Divide interface : [15.0] is not of type (and cannot be coerced to it)", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:262\u001b[0m, in \u001b[0;36mTypeParser.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 262\u001b[0m coerced \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcoerce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:429\u001b[0m, in \u001b[0;36mTypeParser.coerce\u001b[0;34m(self, object_)\u001b[0m\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 429\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:411\u001b[0m, in \u001b[0;36mTypeParser.coerce\u001b[0;34m(self, object_)\u001b[0m\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 411\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mexpand_and_coerce\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobject_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# Defial handling for MultiInputObjects (which are annoying)\u001b[39;00m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:280\u001b[0m, in \u001b[0;36mTypeParser.coerce..expand_and_coerce\u001b[0;34m(obj, pattern)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(pattern, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m--> 280\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcoerce_basic\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 281\u001b[0m origin, pattern_args \u001b[38;5;241m=\u001b[39m pattern\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:316\u001b[0m, in \u001b[0;36mTypeParser.coerce..coerce_basic\u001b[0;34m(obj, pattern)\u001b[0m\n\u001b[1;32m 315\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n\u001b[0;32m--> 316\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_coercible\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m coerce_obj(obj, pattern)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:632\u001b[0m, in \u001b[0;36mTypeParser.check_coercible\u001b[0;34m(self, source, target)\u001b[0m\n\u001b[1;32m 631\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 632\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcheck_type_coercible\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mtype\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource_repr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:688\u001b[0m, in \u001b[0;36mTypeParser.check_type_coercible\u001b[0;34m(self, source, target, source_repr)\u001b[0m\n\u001b[1;32m 687\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m matches_criteria(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoercible):\n\u001b[0;32m--> 688\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 689\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot coerce \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msource_repr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m into \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtarget\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m as the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 690\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcoercion doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt match any of the explicit inclusion criteria: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 691\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(\n\u001b[1;32m 692\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtype_name(s)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m -> \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtype_name(t)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m s, t \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoercible\n\u001b[1;32m 693\u001b[0m )\n\u001b[1;32m 694\u001b[0m )\n\u001b[1;32m 695\u001b[0m matches_not_coercible \u001b[38;5;241m=\u001b[39m matches_criteria(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnot_coercible)\n", - "\u001b[0;31mTypeError\u001b[0m: Cannot coerce [15.0] into in 'x' field of Divide interface as the coercion doesn't match any of the explicit inclusion criteria: Sequence -> Sequence, Mapping -> Mapping, Path -> PathLike, str -> PathLike, PathLike -> Path, PathLike -> str, Any -> MultiInputObj, int -> float, Integer -> float, int -> Decimal, Boolean -> bool, Decimal -> float, Integer -> int, Text -> str, bool -> Boolean, float -> Decimal, int -> Integer, str -> Text, integer -> int, floating -> float, bool -> bool, integer -> float, character -> str, complexfloating -> complex, bytes_ -> bytes, ndarray -> Sequence, Sequence -> ndarray", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 27\u001b[0m\n\u001b[1;32m 24\u001b[0m failing_workflow \u001b[38;5;241m=\u001b[39m UnsafeWorkflow(a\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, b\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m)\u001b[38;5;241m.\u001b[39msplit(c\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m2\u001b[39m ,\u001b[38;5;241m0\u001b[39m])\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Submitter() \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[0;32m---> 27\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43msub\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfailing_workflow\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:193\u001b[0m, in \u001b[0;36mSubmitter.__call__\u001b[0;34m(self, task_def)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mloop\u001b[38;5;241m.\u001b[39mrun_until_complete(\n\u001b[1;32m 190\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun_async(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n\u001b[1;32m 191\u001b[0m )\n\u001b[1;32m 192\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 193\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrun_start_time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:160\u001b[0m, in \u001b[0;36mDebugWorker.run\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mrun\u001b[39m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 156\u001b[0m task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 157\u001b[0m rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 158\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 159\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run a task.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:372\u001b[0m, in \u001b[0;36mTask.run\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 372\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefinition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 373\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:706\u001b[0m, in \u001b[0;36mWorkflowDef._run\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_run\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 705\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 706\u001b[0m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmitter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpand_workflow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:242\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m tasks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;129;01mnot\u001b[39;00m n\u001b[38;5;241m.\u001b[39mdone \u001b[38;5;28;01mfor\u001b[39;00m n \u001b[38;5;129;01min\u001b[39;00m exec_graph\u001b[38;5;241m.\u001b[39mnodes):\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[0;32m--> 242\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 243\u001b[0m tasks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_runnable_tasks(exec_graph)\n\u001b[1;32m 244\u001b[0m workflow_task\u001b[38;5;241m.\u001b[39mreturn_values \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mworkflow\u001b[39m\u001b[38;5;124m\"\u001b[39m: wf, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexec_graph\u001b[39m\u001b[38;5;124m\"\u001b[39m: exec_graph}\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/workers.py:160\u001b[0m, in \u001b[0;36mDebugWorker.run\u001b[0;34m(self, task, rerun)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mrun\u001b[39m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 156\u001b[0m task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 157\u001b[0m rerun: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 158\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mResult\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 159\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run a task.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrerun\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrerun\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/core.py:372\u001b[0m, in \u001b[0;36mTask.run\u001b[0;34m(self, rerun)\u001b[0m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudit\u001b[38;5;241m.\u001b[39mmonitor()\n\u001b[0;32m--> 372\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefinition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 373\u001b[0m result\u001b[38;5;241m.\u001b[39moutputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefinition\u001b[38;5;241m.\u001b[39mOutputs\u001b[38;5;241m.\u001b[39m_from_task(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:706\u001b[0m, in \u001b[0;36mWorkflowDef._run\u001b[0;34m(self, task)\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_run\u001b[39m(\u001b[38;5;28mself\u001b[39m, task: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[WorkflowDef]\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 705\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run the workflow.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 706\u001b[0m \u001b[43mtask\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubmitter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexpand_workflow\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:243\u001b[0m, in \u001b[0;36mSubmitter.expand_workflow\u001b[0;34m(self, workflow_task)\u001b[0m\n\u001b[1;32m 241\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m tasks:\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mworker\u001b[38;5;241m.\u001b[39mrun(task, rerun\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrerun)\n\u001b[0;32m--> 243\u001b[0m tasks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_runnable_tasks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexec_graph\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 244\u001b[0m workflow_task\u001b[38;5;241m.\u001b[39mreturn_values \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mworkflow\u001b[39m\u001b[38;5;124m\"\u001b[39m: wf, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexec_graph\u001b[39m\u001b[38;5;124m\"\u001b[39m: exec_graph}\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:397\u001b[0m, in \u001b[0;36mSubmitter.get_runnable_tasks\u001b[0;34m(self, graph)\u001b[0m\n\u001b[1;32m 395\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m node\u001b[38;5;241m.\u001b[39mstarted:\n\u001b[1;32m 396\u001b[0m not_started\u001b[38;5;241m.\u001b[39madd(node)\n\u001b[0;32m--> 397\u001b[0m tasks\u001b[38;5;241m.\u001b[39mextend(\u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_runnable_tasks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_locks(tasks)\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tasks\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:580\u001b[0m, in \u001b[0;36mNodeExecution.get_runnable_tasks\u001b[0;34m(self, graph)\u001b[0m\n\u001b[1;32m 563\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"For a given node, check to see which tasks have been successfully run, are ready\u001b[39;00m\n\u001b[1;32m 564\u001b[0m \u001b[38;5;124;03mto run, can't be run due to upstream errors, or are blocked on other tasks to complete.\u001b[39;00m\n\u001b[1;32m 565\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 577\u001b[0m \u001b[38;5;124;03m List of tasks that are ready to run\u001b[39;00m\n\u001b[1;32m 578\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 579\u001b[0m runnable: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m--> 580\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtasks\u001b[49m \u001b[38;5;66;03m# Ensure tasks are loaded\u001b[39;00m\n\u001b[1;32m 581\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstarted:\n\u001b[1;32m 582\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblocked \u001b[38;5;241m=\u001b[39m copy(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks)\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:479\u001b[0m, in \u001b[0;36mNodeExecution.tasks\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 476\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 477\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mtasks\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ty\u001b[38;5;241m.\u001b[39mIterable[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 479\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks \u001b[38;5;241m=\u001b[39m \u001b[43m{\u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstate_index\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_tasks\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_tasks\u001b[38;5;241m.\u001b[39mvalues()\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/submitter.py:541\u001b[0m, in \u001b[0;36mNodeExecution._generate_tasks\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21m_generate_tasks\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ty\u001b[38;5;241m.\u001b[39mIterable[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTask[DefType]\u001b[39m\u001b[38;5;124m\"\u001b[39m]:\n\u001b[1;32m 539\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 540\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m Task(\n\u001b[0;32m--> 541\u001b[0m definition\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnode\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_definition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_resolve_lazy_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 542\u001b[0m \u001b[43m \u001b[49m\u001b[43mworkflow_inputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mworkflow_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 543\u001b[0m \u001b[43m \u001b[49m\u001b[43mgraph\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgraph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 544\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 546\u001b[0m submitter\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msubmitter,\n\u001b[1;32m 547\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode\u001b[38;5;241m.\u001b[39mname,\n\u001b[1;32m 548\u001b[0m )\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 550\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, split_defn \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnode\u001b[38;5;241m.\u001b[39m_split_definition()\u001b[38;5;241m.\u001b[39mitems():\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/engine/specs.py:416\u001b[0m, in \u001b[0;36mTaskDef._resolve_lazy_inputs\u001b[0;34m(self, workflow_inputs, graph, state_index)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, LazyOutField):\n\u001b[1;32m 415\u001b[0m resolved[name] \u001b[38;5;241m=\u001b[39m value\u001b[38;5;241m.\u001b[39mget_value(graph, state_index)\n\u001b[0;32m--> 416\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mattrs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevolve\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresolved\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/.pyenv/versions/3.13.1/lib/python3.13/site-packages/attr/_make.py:624\u001b[0m, in \u001b[0;36mevolve\u001b[0;34m(*args, **changes)\u001b[0m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m init_name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m changes:\n\u001b[1;32m 622\u001b[0m changes[init_name] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(inst, attr_name)\n\u001b[0;32m--> 624\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mchanges\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m:8\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, x, y, function)\u001b[0m\n\u001b[1;32m 6\u001b[0m _setattr(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_hashes\u001b[39m\u001b[38;5;124m'\u001b[39m, attr_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_hashes\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mdefault)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m NOTHING:\n\u001b[0;32m----> 8\u001b[0m _setattr(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[43m__attr_converter_x\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 10\u001b[0m _setattr(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m, __attr_converter_x(__attr_factory_x()))\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/utils/typing.py:264\u001b[0m, in \u001b[0;36mTypeParser.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 262\u001b[0m coerced \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcoerce(obj)\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 264\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 265\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIncorrect type for field\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel_str\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobj\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m is not of type \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 266\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m (and cannot be coerced to it)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 267\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m coerced\n", - "\u001b[0;31mTypeError\u001b[0m: Incorrect type for field in 'x' field of Divide interface : [15.0] is not of type (and cannot be coerced to it)" - ] - } - ], + "outputs": [], "source": [ - "from pydra.design import python, workflow\n", + "from pydra.tasks.testing import UnsafeDivisionWorkflow\n", "from pydra.engine.submitter import Submitter\n", + "import nest_asyncio\n", "\n", - "@python.define\n", - "def Add(x: float, y: float) -> float:\n", - " return x + y\n", - "\n", - "@python.define\n", - "def Subtract(x: float, y: float) -> float:\n", - " return x - y\n", - "\n", - "@python.define\n", - "def Divide(x: float, y: float) -> float:\n", - " return x / y\n", - "\n", - "@workflow.define\n", - "def UnsafeWorkflow(a: float, b: float, c: float) -> float:\n", - " add = workflow.add(Add(x=a, y=b))\n", - " divide = workflow.add(Divide(x=add.out, y=c))\n", - " subtract = workflow.add(Subtract(x=divide.out, y=b))\n", - " return subtract.out\n", + "# This is needed to run parallel workflows in Jupyter notebooks\n", + "nest_asyncio.apply()\n", "\n", "# This workflow will fail because we are trying to divide by 0\n", - "failing_workflow = UnsafeWorkflow(a=10, b=5).split(c=[3, 2 ,0])\n", + "failing_workflow = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", - "with Submitter() as sub:\n", + "with Submitter(worker=\"cf\") as sub:\n", " result = sub(failing_workflow)\n", " \n", "if result.errored:\n", @@ -108,11 +116,20 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Stray lockfiles while debugging\n", + "## Tracing upstream issues\n", "\n", - "During the execution of a task, a lockfile is generated to signify that a task is running. If a task/workflow is terminated by an interactive debugger these lockfiles can hang around." + "Failures are common in scientific analysis, even for well tested workflows, due to\n", + "the novel nature and of scientific experiments and known artefacts that can occur.\n", + "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", + "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", + "issue occurred." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -121,7 +138,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "wf12", "language": "python", "name": "python3" }, @@ -135,7 +152,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.1" + "version": "3.12.5" } }, "nbformat": 4, diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 173a3b5e58..62f5b1e449 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,32 +1,25 @@ -from pydra.design import python, workflow +from pydra.tasks.testing import UnsafeDivisionWorkflow from pydra.engine.submitter import Submitter +# This workflow will fail because we are trying to divide by 0 +wf = UnsafeDivisionWorkflow(a=10, b=5, denominator=2) -@python.define -def Add(x: float, y: float) -> float: - return x + y - - -@python.define -def Subtract(x: float, y: float) -> float: - return x - y - - -@python.define -def Divide(x: float, y: float) -> float: - return x / y +if __name__ == "__main__": + with Submitter(worker="cf") as sub: + result = sub(wf) -@workflow.define -def UnsafeWorkflow(a: float, b: float, c: float) -> float: - add = workflow.add(Add(x=a, y=b)) - divide = workflow.add(Divide(x=add.out, y=c)) - subtract = workflow.add(Subtract(x=divide.out, y=b)) - return subtract.out +# from pydra.tasks.testing import UnsafeDivisionWorkflow +# from pydra.engine.submitter import Submitter +# # This workflow will fail because we are trying to divide by 0 +# failing_workflow = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2, 0]) -# This workflow will fail because we are trying to divide by 0 -failing_workflow = UnsafeWorkflow(a=10, b=5).split(c=[3, 2, 0]) +# if __name__ == "__main__": +# with Submitter(worker="cf") as sub: +# result = sub(failing_workflow) -with Submitter() as sub: - result = sub(failing_workflow) +# if result.errored: +# print("Workflow failed with errors:\n" + str(result.errors)) +# else: +# print("Workflow completed successfully :)") diff --git a/pydra/engine/core.py b/pydra/engine/core.py index c4e69974d8..4fb2f78a8d 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -118,10 +118,6 @@ def __init__( 3. No cache or other process -> start 4. Two or more concurrent new processes get to start """ - from . import check_latest_version - - if Task._etelemetry_version_data is None: - Task._etelemetry_version_data = check_latest_version() if state_index is None: state_index = state.StateIndex() @@ -208,7 +204,7 @@ def checksum(self): if self._checksum is not None: return self._checksum input_hash = self.definition._hash - self._checksum = create_checksum(self.definition.__class__.__name__, input_hash) + self._checksum = create_checksum(self.definition._task_type, input_hash) return self._checksum @property diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 24217cc951..5289ced5c9 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -350,7 +350,7 @@ def create_checksum(name, inputs): String of inputs. """ - return "_".join((name, inputs)) + return "-".join((name, inputs)) def record_error(error_path, error): diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 019151e2e0..dda1941aa6 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -117,6 +117,8 @@ def __getitem__(self, name_or_index: str | int) -> ty.Any: class TaskDef(ty.Generic[OutputsType]): """Base class for all task definitions""" + _task_type: str + # The following fields are used to store split/combine state information _splitter = attrs.field(default=None, init=False, repr=False) _combiner = attrs.field(default=None, init=False, repr=False) @@ -621,6 +623,8 @@ def _from_task(cls, task: "Task[PythonDef]") -> Self: class PythonDef(TaskDef[PythonOutputsType]): + _task_type: str = "python" + def _run(self, task: "Task[PythonDef]") -> None: # Prepare the inputs to the function inputs = attrs_values(self) @@ -701,6 +705,8 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: @attrs.define(kw_only=True) class WorkflowDef(TaskDef[WorkflowOutputsType]): + _task_type: str = "workflow" + RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("construct",) _constructed = attrs.field(default=None, init=False) @@ -889,6 +895,8 @@ def _resolve_value( class ShellDef(TaskDef[ShellOutputsType]): + _task_type: str = "shell" + BASE_NAMES = ["additional_args"] additional_args: list[str] = shell.arg( diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 0d3f71b9ed..be51f49db6 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -98,6 +98,11 @@ def __init__( **kwargs, ): + from . import check_latest_version + + if Task._etelemetry_version_data is None: + Task._etelemetry_version_data = check_latest_version() + self.audit = Audit( audit_flags=audit_flags, messengers=messengers, @@ -199,7 +204,7 @@ def Split(defn: TaskDef, output_types: dict): if task.lockfile.exists(): raise RuntimeError( f"Task {task} has a lockfile, but no result was found. " - "This may be due to another submission process queued, or the hard " + "This may be due to another submission that is currently running, or the hard " "interrupt (e.g. a debugging abortion) interrupting a previous run. " f"In the case of an interrupted run, please remove {str(task.lockfile)!r} " "and resubmit." From 30d0a7c97951cbf51e790c07bee6919c954a50f6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Feb 2025 16:11:28 +1100 Subject: [PATCH 169/342] created pydra.task.testing sub-package --- pydra/tasks/testing/__init__.py | 43 +++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 pydra/tasks/testing/__init__.py diff --git a/pydra/tasks/testing/__init__.py b/pydra/tasks/testing/__init__.py new file mode 100644 index 0000000000..6f7d174793 --- /dev/null +++ b/pydra/tasks/testing/__init__.py @@ -0,0 +1,43 @@ +from pydra.design import python, workflow + + +@python.define +def Add(x: float, y: float) -> float: + return x + y + + +@python.define +def Divide(x: float, y: float) -> float: + return x / y + + +@python.define +def Subtract(x: float, y: float) -> float: + return x - y + + +@workflow.define +def UnsafeDivisionWorkflow(a: float, b: float, denominator: float) -> float: + """Adds 'a' and 'b' together, divides by 'denominator', and then subtracts 'b' from + the output. Division by 0 is not guarded against so the workflow will fail if + the value passed to the 'denominator' parameter is 0. + + Parameters + ---------- + a : float + The first number to add. + b : float + The second number to add. + denominator : float + The number to divide the sum of 'a' and 'b' by. + + Returns + ------- + out : float + The result of subtracting 'b' from the result of dividing the sum of 'a' and + 'b' by 'denominator'. + """ + add = workflow.add(Add(x=a, y=b)) + divide = workflow.add(Divide(x=add.out, y=denominator)) + subtract = workflow.add(Subtract(x=divide.out, y=b)) + return subtract.out From 54dc0927c3d206d32cc03e7a468457ce14ba88b5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Feb 2025 22:21:25 +1100 Subject: [PATCH 170/342] working on troubleshooting tutorial --- .../source/tutorial/3-troubleshooting.ipynb | 80 ++++++++++++------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 99db0e35e2..9e44f1a88c 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -10,6 +10,24 @@ "avoid common pitfalls." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Things to check if Pydra gets stuck\n", + "\n", + "I There are a number of common gotchas, related to running multi-process code, that can\n", + "cause Pydra workflows to get stuck and not execute correctly. If using the concurrent\n", + "futures worker (e.g. `worker=\"cf\"`), check these issues first before filing a bug report\n", + "or reaching out for help.\n", + "\n", + "### Applying `nest_asyncio` when running within a notebook\n", + "\n", + "When using the concurrent futures worker within a Jupyter notebook you need to apply\n", + "`nest_asyncio` with the following lines" + ] + }, { "cell_type": "code", "execution_count": 3, @@ -25,21 +43,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "### Enclosing multi-process code within `if __name__ == \"__main__\"`\n", "\n", - "## Things to check first\n", - "\n", - "### Running in *debug* mode\n", - "\n", - "By default, Pydra will run with the *debug* worker, which executes each task serially\n", - "within a single process without use of `async/await` blocks, to allow raised exceptions\n", - "to propagate gracefully to the calling code. If you are having trouble with a pipeline,\n", - "ensure that `worker=debug` is passed to the submission/execution call (the default).\n", - "\n", - "\n", - "## Enclosing multi-process code within `if __name__ == \"__main__\"`\n", - "\n", - "If using the concurrent futures worker (`worker=\"cf\"`) on macOS or Windows, then you need\n", - "to enclose top-level scripts within `if __name__ == \"__main__\"` blocks, e.g." + "If running a script that executes a workflow with the concurrent futures worker\n", + "(i.e. `worker=\"cf\"`) on macOS or Windows, then the submissing/execution call needs to\n", + "be enclosed within a `if __name__ == \"__main__\"` blocks, e.g." ] }, { @@ -63,7 +71,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", "### Remove stray lockfiles\n", "\n", "During the execution of a task, a lockfile is generated to signify that a task is running.\n", @@ -77,14 +84,27 @@ "If the `clean_stale_locks` flag is set (by default when using the *debug* worker), locks that\n", "were created before the outer task was submitted are removed before the task is run.\n", "However, since these locks could be created by separate submission processes, ``clean_stale_locks`\n", - "is not switched on by default when using production workers (e.g. `cf`, `slurm`, etc...).\n", + "is not switched on by default when using production workers (e.g. `cf`, `slurm`, etc...)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finding errors\n", + "\n", + "### Running in *debug* mode\n", + "\n", + "By default, Pydra will run with the *debug* worker, which executes each task serially\n", + "within a single process without use of `async/await` blocks, to allow raised exceptions\n", + "to propagate gracefully to the calling code. If you are having trouble with a pipeline,\n", + "ensure that `worker=debug` is passed to the submission/execution call (the default).\n", "\n", - "## Locating error messages\n", + "### Reading error files\n", "\n", - "If running in debug mode (the default), runtime exceptions will be raised to the\n", - "call shell or debugger. However, when using asynchronous workers the errors will\n", - "be saved in `_error.pklz` pickle files inside the task's cache directory. For\n", - "example, given the following toy example" + "When a task raises an error, it is captured and saved in pickle file named `_error.pklz`\n", + "within task's cache directory. For example, when calling the toy `UnsafeDivisionWorkflow`\n", + "with a `denominator=0`, the task will fail." ] }, { @@ -93,18 +113,11 @@ "metadata": {}, "outputs": [], "source": [ - "from pydra.tasks.testing import UnsafeDivisionWorkflow\n", - "from pydra.engine.submitter import Submitter\n", - "import nest_asyncio\n", - "\n", - "# This is needed to run parallel workflows in Jupyter notebooks\n", - "nest_asyncio.apply()\n", - "\n", "# This workflow will fail because we are trying to divide by 0\n", - "failing_workflow = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", + "wf = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", "with Submitter(worker=\"cf\") as sub:\n", - " result = sub(failing_workflow)\n", + " result = sub(wf)\n", " \n", "if result.errored:\n", " print(\"Workflow failed with errors:\\n\" + str(result.errors))\n", @@ -122,7 +135,12 @@ "the novel nature and of scientific experiments and known artefacts that can occur.\n", "Therefore, it is always to sanity-check results produced by workflows. When a problem\n", "occurs in a multi-stage workflow it can be difficult to identify at which stage the\n", - "issue occurred." + "issue occurred.\n", + "\n", + "Currently in Pydra you need to step backwards through the tasks of the workflow, load\n", + "the saved task object and inspect its inputs to find the preceding nodes. If any of the\n", + "inputs that have been generated by previous nodes are not ok, then you should check the\n", + "tasks that generated them in turn." ] }, { From bf1115185dc47861b16e693a79eddd69c5fb16e0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Feb 2025 22:24:42 +1100 Subject: [PATCH 171/342] more touch ups to troubleshooting tutorial --- .../source/tutorial/3-troubleshooting.ipynb | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 9e44f1a88c..460d82d356 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -125,6 +125,13 @@ " print(\"Workflow completed successfully :)\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Work in progress..." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -140,14 +147,27 @@ "Currently in Pydra you need to step backwards through the tasks of the workflow, load\n", "the saved task object and inspect its inputs to find the preceding nodes. If any of the\n", "inputs that have been generated by previous nodes are not ok, then you should check the\n", - "tasks that generated them in turn." + "tasks that generated them in turn.\n", + "\n", + "For example, in the following example if we are not happy with the mask brain that has\n", + "been generated, we can check the mask to see whether it looks sensible by first loading\n", + "the apply mask task and then inspecting its inputs." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Work in progress..." + ] + }, { "cell_type": "markdown", "metadata": {}, From dd606f8f4398eccbf44a493c6da3b77dc0fba3eb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Feb 2025 12:37:06 +1100 Subject: [PATCH 172/342] debugging new syntax in unittests --- .../source/tutorial/3-troubleshooting.ipynb | 115 ++++++++++++++---- new-docs/source/tutorial/tst.py | 2 +- pydra/design/python.py | 4 +- pydra/engine/specs.py | 2 + pydra/engine/tests/test_functions.py | 44 +++---- pydra/tasks/testing/__init__.py | 34 ++++++ 6 files changed, 152 insertions(+), 49 deletions(-) diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 460d82d356..b24d2abe58 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -6,8 +6,8 @@ "source": [ "# Troubleshooting\n", "\n", - "This tutorial steps through tecnhiques to identify errors and pipeline failures, and\n", - "avoid common pitfalls." + "This tutorial steps through tecnhiques to identify errors and pipeline failures, as well\n", + "as avoid common pitfalls setting up executing over multiple processes." ] }, { @@ -45,9 +45,9 @@ "source": [ "### Enclosing multi-process code within `if __name__ == \"__main__\"`\n", "\n", - "If running a script that executes a workflow with the concurrent futures worker\n", - "(i.e. `worker=\"cf\"`) on macOS or Windows, then the submissing/execution call needs to\n", - "be enclosed within a `if __name__ == \"__main__\"` blocks, e.g." + "When running multi-process Python code on macOS or Windows, as is the case when the \n", + "concurrent futures worker is selected (i.e. `worker=\"cf\"`), then scripts that execute\n", + "the forking code need to be enclosed within an `if __name__ == \"__main__\"` block, e.g." ] }, { @@ -71,12 +71,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Remove stray lockfiles\n", + "This allows the secondary processes to import the script without executing it. Without\n", + "such a block Pydra will lock up and not process the workflow. On Linux this is not an\n", + "issue due to the way that processes are forked, but is good practice in any case for\n", + "code portability." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removing stray lockfiles\n", "\n", - "During the execution of a task, a lockfile is generated to signify that a task is running.\n", - "These lockfiles are released after a task completes, either successfully or with an error,\n", - "within a *try/finally* block. However, if a task/workflow is terminated by an interactive\n", - "debugger the finally block may not be executed causing stray lockfiles to hang around. This\n", + "When a Pydra task is executed, a lockfile is generated to signify that the task is running.\n", + "Other processes will wait for this lock to be released before attempting to access the\n", + "tasks results. The lockfiles are automatically deleted after a task completes, either\n", + "successfully or with an error, within a *try/finally* block so should run most of the time.\n", + "However, if a task/workflow is terminated by an interactive\n", + "debugger, the finally block may not be executed, leaving stray lockfiles. This\n", "can cause the Pydra to hang waiting for the lock to be released. If you suspect this to be\n", "an issue, and there are no other jobs running, then simply remove all lock files from your\n", "cache directory (e.g. `rm /*.lock`) and re-submit your job.\n", @@ -91,7 +103,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Finding errors\n", + "## Inspecting errors\n", "\n", "### Running in *debug* mode\n", "\n", @@ -116,8 +128,9 @@ "# This workflow will fail because we are trying to divide by 0\n", "wf = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", - "with Submitter(worker=\"cf\") as sub:\n", - " result = sub(wf)\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", " \n", "if result.errored:\n", " print(\"Workflow failed with errors:\\n\" + str(result.errors))\n", @@ -129,7 +142,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Work in progress..." + "The error pickle files can be loaded using the `cloudpickle` library, noting that it is\n", + "important to use the same Python version to load the files that was used to run the Pydra\n", + "workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cloudpickle as cp\n", + "\n", + "with open(\"/ 5\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mSubmitter\u001b[49m(worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcf\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[1;32m 6\u001b[0m result \u001b[38;5;241m=\u001b[39m sub(wf)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWorkflow completed successfully, results saved in: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;241m.\u001b[39moutput_dir\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'Submitter' is not defined" + ] + } + ], + "source": [ + "from pydra.tasks.testing import SafeDivisionWorkflow\n", + "\n", + "wf = SafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", + "\n", + "with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", + " \n", + "print(f\"Workflow completed successfully, results saved in: {result.output_dir}\")" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Work in progress..." + "To find the task directory where the issue first surfaced, iterate through every task\n", + "cache dir and check the results for `float(\"inf\")`s" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, - "source": [] + "outputs": [], + "source": [ + "import cloudpickle as cp\n", + "from pydra.utils import user_cache_dir\n", + "\n", + "run_cache = user_cache_dir / \"run-cache\"\n", + "\n", + "for task_cache_dir in run_cache.iterdir():\n", + " with open(task_cache_dir / \"_result.pklz\", \"rb\") as f:\n", + " error = cp.load(f)\n", + " for \n", + " " + ] } ], "metadata": { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 62f5b1e449..921700879d 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -5,7 +5,7 @@ wf = UnsafeDivisionWorkflow(a=10, b=5, denominator=2) if __name__ == "__main__": - with Submitter(worker="cf") as sub: + with Submitter(worker="cf", rerun=True) as sub: result = sub(wf) diff --git a/pydra/design/python.py b/pydra/design/python.py index 9dde3098f7..50b94f387f 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -166,7 +166,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: for i, output in enumerate(parsed_outputs.values()): output.order = i - interface = make_task_def( + defn = make_task_def( PythonDef, PythonOutputs, parsed_inputs, @@ -177,7 +177,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: outputs_bases=outputs_bases, ) - return interface + return defn if wrapped is not None: if not isinstance(wrapped, (ty.Callable, type)): diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index dda1941aa6..477a09ac2b 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -380,6 +380,8 @@ def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: if getattr(field, "container_path", False): continue inp_dict[field.name] = getattr(self, field.name) + # Include the outputs class, just in case any names or types have changed + inp_dict["Outputs"] = self.Outputs hash_cache = Cache() field_hashes = { k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() diff --git a/pydra/engine/tests/test_functions.py b/pydra/engine/tests/test_functions.py index 445fdf6951..5e94e492ad 100644 --- a/pydra/engine/tests/test_functions.py +++ b/pydra/engine/tests/test_functions.py @@ -182,18 +182,17 @@ def Indirect(a): # Run functions to ensure behavior is unaffected a = random.randint(0, (1 << 32) - 3) - assert Direct(a=a) == Partial(a=a) - assert Direct(a=a) == Indirect(a=a) + assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) # checking if the annotation is properly converted to output_spec if used in task assert list_fields(Direct.Outputs) == [ - python.arg(name="sum", type=int), - python.arg(name="sub", type=int), + python.out(name="sum", type=int, order=0), + python.out(name="sub", type=int, order=1), ] def test_invalid_annotation(): - with pytest.raises(TypeError): + with pytest.raises(ValueError, match="Unrecognised input names"): @python.define(inputs={"b": int}) def addtwo(a): @@ -202,50 +201,51 @@ def addtwo(a): def test_annotated_task(): - def square(in_val: float): + @python.define + def Square(in_val: float): return in_val**2 - res = square(in_val=2.0)() - assert res.output.out == 4.0 + outputs = Square(in_val=2.0)() + assert outputs.out == 4.0 def test_return_annotated_task(): @python.define(inputs={"in_val": float}, outputs={"squared": float}) - def square(in_val): + def Square(in_val): return in_val**2 - res = square(in_val=2.0)() - assert res.output.squared == 4.0 + outputs = Square(in_val=2.0)() + assert outputs.squared == 4.0 def test_return_halfannotated_annotated_task(): @python.define(inputs={"in_val": float}, outputs={"out": float}) - def square(in_val): + def Square(in_val): return in_val**2 - res = square(in_val=2.0)() - assert res.output.out == 4.0 + outputs = Square(in_val=2.0)() + assert outputs.out == 4.0 def test_return_annotated_task_multiple_output(): @python.define(inputs={"in_val": float}, outputs={"squared": float, "cubed": float}) - def square(in_val): + def Square(in_val): return in_val**2, in_val**3 - res = square(in_val=2.0)() - assert res.output.squared == 4.0 - assert res.output.cubed == 8.0 + outputs = Square(in_val=2.0)() + assert outputs.squared == 4.0 + assert outputs.cubed == 8.0 def test_return_halfannotated_task_multiple_output(): @python.define(inputs={"in_val": float}, outputs=(float, float)) - def square(in_val): + def Square(in_val): return in_val**2, in_val**3 - res = square(in_val=2.0)() - assert res.output.out1 == 4.0 - assert res.output.out2 == 8.0 + outputs = Square(in_val=2.0)() + assert outputs.out1 == 4.0 + assert outputs.out2 == 8.0 diff --git a/pydra/tasks/testing/__init__.py b/pydra/tasks/testing/__init__.py index 6f7d174793..cff67a927c 100644 --- a/pydra/tasks/testing/__init__.py +++ b/pydra/tasks/testing/__init__.py @@ -11,6 +11,13 @@ def Divide(x: float, y: float) -> float: return x / y +@python.define +def SafeDivide(x: float, y: float) -> float: + if y == 0: + return float("inf") + return x / y + + @python.define def Subtract(x: float, y: float) -> float: return x - y @@ -41,3 +48,30 @@ def UnsafeDivisionWorkflow(a: float, b: float, denominator: float) -> float: divide = workflow.add(Divide(x=add.out, y=denominator)) subtract = workflow.add(Subtract(x=divide.out, y=b)) return subtract.out + + +@workflow.define +def SafeDivisionWorkflow(a: float, b: float, denominator: float) -> float: + """Adds 'a' and 'b' together, divides by 'denominator', and then subtracts 'b' from + the output. Division by 0 is not guarded against so the workflow will fail if + the value passed to the 'denominator' parameter is 0. + + Parameters + ---------- + a : float + The first number to add. + b : float + The second number to add. + denominator : float + The number to divide the sum of 'a' and 'b' by. + + Returns + ------- + out : float + The result of subtracting 'b' from the result of dividing the sum of 'a' and + 'b' by 'denominator'. + """ + add = workflow.add(Add(x=a, y=b)) + divide = workflow.add(SafeDivide(x=add.out, y=denominator)) + subtract = workflow.add(Subtract(x=divide.out, y=b)) + return subtract.out From db3cd4cf7c3d22f253c5e3bbc0e180a880082bd5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Feb 2025 12:41:52 +1100 Subject: [PATCH 173/342] removed python output order attribute --- pydra/design/base.py | 2 -- pydra/design/python.py | 7 +----- pydra/design/shell.py | 2 -- pydra/design/tests/test_python.py | 32 ++++++++++++++-------------- pydra/engine/specs.py | 11 ++++------ pydra/engine/tests/test_functions.py | 12 +++++------ 6 files changed, 27 insertions(+), 39 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 9d26effb8f..8a8b8889de 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -267,8 +267,6 @@ class Out(Field): The converter for the field passed through to the attrs.field, by default it is None validator: callable | iterable[callable], optional The validator(s) for the field passed through to the attrs.field, by default it is None - order : int - The order of the output in the output list, allows for tuple unpacking of outputs """ pass diff --git a/pydra/design/python.py b/pydra/design/python.py index 50b94f387f..db2d5dd604 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -80,7 +80,7 @@ class out(Out): outputs """ - order: int = attrs.field(default=None) + pass @dataclass_transform( @@ -161,11 +161,6 @@ def make(wrapped: ty.Callable | type) -> PythonDef: name="function", type=ty.Callable, default=function ) - # Set positions for outputs to allow for tuple unpacking - output: out - for i, output in enumerate(parsed_outputs.values()): - output.order = i - defn = make_task_def( PythonDef, PythonOutputs, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index c182223653..6fe38e302d 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -125,8 +125,6 @@ class out(Out): to the function), output_dir (task output_dir will be used), stdout, stderr (stdout and stderr of the task will be sent) inputs (entire inputs will be passed) or any input field name (a specific input field will be sent). - order : int - The order of the output in the output list, allows for tuple unpacking of outputs """ callable: ty.Callable | None = None diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index ba822b3d2c..dce89dbf04 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -25,7 +25,7 @@ def func(a: int) -> float: python.arg(name="a", type=int), python.arg(name="function", type=ty.Callable, default=func), ] - assert outputs == [python.out(name="out", type=float, order=0)] + assert outputs == [python.out(name="out", type=float)] definition = SampleDef(a=1) outputs = definition() assert outputs.out == 2.0 @@ -48,7 +48,7 @@ def func(a: int, k: float = 2.0) -> float: python.arg(name="function", type=ty.Callable, default=func), python.arg(name="k", type=float, default=2.0), ] - assert outputs == [python.out(name="out", type=float, order=0)] + assert outputs == [python.out(name="out", type=float)] assert SampleDef(a=1)().out == 2.0 assert SampleDef(a=10, k=3.0)().out == 30.0 @@ -72,7 +72,7 @@ def func(a: int) -> float: python.arg(name="function", type=ty.Callable, default=func), ] assert outputs == [ - python.out(name="b", type=Decimal, help="the doubled output", order=0), + python.out(name="b", type=Decimal, help="the doubled output"), ] outputs = SampleDef.Outputs(b=Decimal(2.0)) assert isinstance(outputs.b, Decimal) @@ -96,7 +96,7 @@ def func(a: int) -> int: python.arg(name="a", type=float), python.arg(name="function", type=ty.Callable, default=func), ] - assert outputs == [python.out(name="b", type=float, order=0)] + assert outputs == [python.out(name="b", type=float)] intf = SampleDef(a=1) assert isinstance(intf.a, float) outputs = SampleDef.Outputs(b=2.0) @@ -122,8 +122,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, order=0), - python.out(name="d", type=float, order=1), + python.out(name="c", type=float), + python.out(name="d", type=float), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" SampleDef.Outputs(c=1.0, d=2.0) @@ -153,8 +153,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b", order=0), - python.out(name="d", type=float, help="product of a and b", order=1), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="product of a and b"), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -187,8 +187,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b", order=0), - python.out(name="d", type=float, help="Product of a and b", order=1), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -229,8 +229,8 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b", order=0), - python.out(name="d", type=float, help="Product of a and b", order=1), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert attrs.fields(SampleDef).function.default.__name__ == "SampleDef" @@ -276,8 +276,8 @@ def function(a, b): ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b", order=0), - python.out(name="d", type=float, help="Product of a and b", order=1), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert SampleDef.function.__name__ == "function" SampleDef(a=1) @@ -346,8 +346,8 @@ def function(a, b): ), ] assert outputs == [ - python.out(name="c", type=float, help="Sum of a and b", order=0), - python.out(name="d", type=float, help="Product of a and b", order=1), + python.out(name="c", type=float, help="Sum of a and b"), + python.out(name="d", type=float, help="Product of a and b"), ] assert SampleDef.function.__name__ == "function" SampleDef(a=1, b=2.0) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 477a09ac2b..5fc522baef 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -4,7 +4,6 @@ import re from copy import copy import os -from operator import attrgetter import inspect import itertools import platform @@ -87,6 +86,10 @@ def _get_node(self): f"{self} outputs object is not a lazy output of a workflow node" ) from None + def __iter__(self) -> list[str]: + """The names of the fields in the output object""" + return sorted(f.name for f in attrs_fields(self)) + def __getitem__(self, name_or_index: str | int) -> ty.Any: """Return the value for the given attribute @@ -591,12 +594,6 @@ class RuntimeSpec: class PythonOutputs(TaskOutputs): - def __iter__(self) -> ty.Generator[ty.Any, None, None]: - """Iterate through all the values in the definition, allows for tuple unpacking""" - fields = sorted(attrs_fields(self), key=attrgetter("order")) - for field in fields: - yield getattr(self, field.name) - @classmethod def _from_task(cls, task: "Task[PythonDef]") -> Self: """Collect the outputs of a task from a combination of the provided inputs, diff --git a/pydra/engine/tests/test_functions.py b/pydra/engine/tests/test_functions.py index 5e94e492ad..6008aa4a29 100644 --- a/pydra/engine/tests/test_functions.py +++ b/pydra/engine/tests/test_functions.py @@ -89,7 +89,7 @@ def Indirect(a): assert non_func_values(Direct(a=a)) == non_func_values(Indirect(a=a)) # checking if the annotation is properly converted to output_spec if used in task - assert list_fields(Direct.Outputs)[0] == python.out(name="out", type=int, order=0) + assert list_fields(Direct.Outputs)[0] == python.out(name="out", type=int) def test_annotation_equivalence_2(): @@ -119,8 +119,8 @@ def Indirect(a) -> tuple[int, float]: # checking if the annotation is properly converted to output_spec if used in task assert list_fields(Direct.Outputs) == [ - python.out(name="out1", type=int, order=0), - python.out(name="out2", type=float, order=1), + python.out(name="out1", type=int), + python.out(name="out2", type=float), ] @@ -150,7 +150,7 @@ def Indirect(a): assert hashes(Direct(a=a)) == hashes(Partial(a=a)) == hashes(Indirect(a=a)) # checking if the annotation is properly converted to output_spec if used in task - assert list_fields(Direct.Outputs)[0] == python.out(name="out1", type=int, order=0) + assert list_fields(Direct.Outputs)[0] == python.out(name="out1", type=int) def test_annotation_equivalence_4(): @@ -186,8 +186,8 @@ def Indirect(a): # checking if the annotation is properly converted to output_spec if used in task assert list_fields(Direct.Outputs) == [ - python.out(name="sum", type=int, order=0), - python.out(name="sub", type=int, order=1), + python.out(name="sum", type=int), + python.out(name="sub", type=int), ] From ac800657097ffb2a03e104b743f6507921dac87a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Feb 2025 12:44:15 +1100 Subject: [PATCH 174/342] finished troubleshooting tutorial for now --- new-docs/source/tutorial/3-troubleshooting.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index b24d2abe58..c629b769f9 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -235,8 +235,10 @@ "\n", "for task_cache_dir in run_cache.iterdir():\n", " with open(task_cache_dir / \"_result.pklz\", \"rb\") as f:\n", - " error = cp.load(f)\n", - " for \n", + " result = cp.load(f)\n", + " for field_name in result.outputs:\n", + " if result.outputs[field_name] == float('inf'):\n", + " print(f\"Task {task_cache_dir.name!r} produced an infinite value for {field_name!r}\")\n", " " ] } From 830abb5c3198cbbd20c8468cf736735f04593289 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Feb 2025 15:51:57 +1100 Subject: [PATCH 175/342] finished changes to docs for now. Not all tutorials are working though --- .python-version | 1 - new-docs/source/examples/.python-version | 1 + .../tutorial/2-advanced-execution.ipynb | 39 +++++- .../source/tutorial/3-troubleshooting.ipynb | 5 +- new-docs/source/tutorial/6-workflow.ipynb | 124 ++++++++++++++++++ .../source/tutorial/7-canonical-form.ipynb | 61 ++------- pydra/engine/core.py | 18 ++- pydra/engine/node.py | 2 + pydra/engine/submitter.py | 4 +- 9 files changed, 195 insertions(+), 60 deletions(-) delete mode 100644 .python-version create mode 100644 new-docs/source/examples/.python-version diff --git a/.python-version b/.python-version deleted file mode 100644 index c10780c628..0000000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13.1 diff --git a/new-docs/source/examples/.python-version b/new-docs/source/examples/.python-version new file mode 100644 index 0000000000..2da62ef804 --- /dev/null +++ b/new-docs/source/examples/.python-version @@ -0,0 +1 @@ +wf12 diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index b537a88b08..62d8fd1e49 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -395,9 +395,44 @@ "source": [ "## Environments\n", "\n", - "Work in progress...\n", + "For shell tasks, it is possible to specify that the command runs within a specific\n", + "software environment, such as those provided by software containers (e.g. Docker or Apptainer).\n", + "This is down by providing the environment to the submitter/execution call," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "from pydra.engine.environments import Docker\n", + "\n", + "test_dir = tempfile.mkdtemp()\n", + "\n", + "nifti_file = Nifti1.sample(test_dir, seed=0)\n", "\n", - "See [Containers and Environments](../explanation/environments.rst) for more details." + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(in_file=nifti_file, operation=\"regrid\", voxel=(0.5,0.5,0.5))\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid(environment=Docker(image=\"mrtrix3/mrtrix3\", tag=\"latest\"))\n", + "\n", + "# Print the locations of the output files\n", + "print(\"\\n\".join(str(p) for p in outputs.out_file))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course for this to work Docker needs to work and be configured for\n", + "[sudo-less execution](https://docs.docker.com/engine/install/linux-postinstall/).\n", + "See [Containers and Environments](../explanation/environments.rst) for more details on\n", + "how to utilise containers and add support for other software environments." ] }, { diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index c629b769f9..aa0950ea3e 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -128,9 +128,8 @@ "# This workflow will fail because we are trying to divide by 0\n", "wf = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", - "if __name__ == \"__main__\":\n", - " with Submitter(worker=\"cf\") as sub:\n", - " result = sub(wf)\n", + "with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", " \n", "if result.errored:\n", " print(\"Workflow failed with errors:\\n\" + str(result.errors))\n", diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index bbff37bf8d..4fa0c00d87 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -414,6 +414,130 @@ " wf.outputs.out1 = mul.out\n", " wf.outputs.out2 = divide.divided" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting software environments per node\n", + "\n", + "The [Advanced execution tutorial](./2-advanced-execution.html) showed how the software\n", + "environment (e.g. Docker container) could be specified for shell tasks by passing the\n", + "`environment` variable to the task execution/submission call. For shell tasks\n", + "within workflows, the software environment used for them is specified when adding\n", + "a new workflow node, i.e." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "The canonical form of specs task definitions, , must inherit from ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 56\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output_conversion\u001b[38;5;241m.\u001b[39mout_file\n\u001b[1;32m 54\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m tempfile\u001b[38;5;241m.\u001b[39mmkdtemp()\n\u001b[0;32m---> 56\u001b[0m nifti_file \u001b[38;5;241m=\u001b[39m \u001b[43mNifti1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 58\u001b[0m wf \u001b[38;5;241m=\u001b[39m ToyMedianThreshold(in_image\u001b[38;5;241m=\u001b[39mnifti_file)\n\u001b[1;32m 60\u001b[0m outputs \u001b[38;5;241m=\u001b[39m wf()\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1049\u001b[0m, in \u001b[0;36mFileSet.sample\u001b[0;34m(cls, dest_dir, seed, stem)\u001b[0m\n\u001b[1;32m 1046\u001b[0m stem \u001b[38;5;241m=\u001b[39m dest_dir\u001b[38;5;241m.\u001b[39mname\n\u001b[1;32m 1047\u001b[0m \u001b[38;5;66;03m# Need to use mock to get an instance in order to use the singledispatch-based\u001b[39;00m\n\u001b[1;32m 1048\u001b[0m \u001b[38;5;66;03m# extra decorator\u001b[39;00m\n\u001b[0;32m-> 1049\u001b[0m fspaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43mSampleFileGenerator\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfname_stem\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1053\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m(fspaths)\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1082\u001b[0m, in \u001b[0;36mFileSet.sample_data\u001b[0;34m(cls, generator)\u001b[0m\n\u001b[1;32m 1068\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Converts the `generate_sample_data` method into a class method by mocking up\u001b[39;00m\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;124;03ma class instance and calling the method on it\u001b[39;00m\n\u001b[1;32m 1070\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;124;03m the generated file-system paths\u001b[39;00m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1081\u001b[0m mock: FileSet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmock()\n\u001b[0;32m-> 1082\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_sample_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/extras.py:38\u001b[0m, in \u001b[0;36mextra..decorated\u001b[0;34m(obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m extras \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tp \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mreferenced_types(): \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m extras\u001b[38;5;241m.\u001b[39mappend(\u001b[43mimport_extras_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtp\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dispatch_method(obj, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/utils.py:228\u001b[0m, in \u001b[0;36mimport_extras_module\u001b[0;34m(klass)\u001b[0m\n\u001b[1;32m 226\u001b[0m extras_pypi \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfileformats-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msub_pkg\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-extras\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 228\u001b[0m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextras_pkg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo module named \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mextras_pkg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", + "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 89\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m:1387\u001b[0m, in \u001b[0;36m_gcd_import\u001b[0;34m(name, package, level)\u001b[0m\n", + "File \u001b[0;32m:1360\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", + "File \u001b[0;32m:1331\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", + "File \u001b[0;32m:935\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", + "File \u001b[0;32m:995\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", + "File \u001b[0;32m:488\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n", + "File \u001b[0;32m~/git/workflows/fileformats-medimage/extras/fileformats/extras/medimage/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# flake8: noqa: F401\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_version\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m __version__\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m converters\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dicom\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m diffusion\n", + "File \u001b[0;32m~/git/workflows/fileformats-medimage/extras/fileformats/extras/medimage/converters.py:188\u001b[0m\n\u001b[1;32m 183\u001b[0m lst\u001b[38;5;241m.\u001b[39mappend(file)\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lst\n\u001b[1;32m 187\u001b[0m \u001b[38;5;129;43m@converter\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msource_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMedicalImage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mAnalyze\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_ext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mAnalyze\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mext\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m--> 188\u001b[0m \u001b[38;5;129;43m@shell\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefine\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;43;01mclass\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;21;43;01mMrConvert\u001b[39;49;00m\u001b[43m:\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;124;43;03m\"\"\"If used correctly, this program can be a very useful workhorse. In addition to converting images between different formats, it can be used to extract specific studies from a data set, extract a specific region of interest, or flip the images. Some of the possible operations are described in more detail below.\u001b[39;49;00m\n\u001b[1;32m 191\u001b[0m \n\u001b[1;32m 192\u001b[0m \u001b[38;5;124;43;03m Note that for both the -coord and -axes options, indexing starts from 0 rather than 1. E.g. -coord 3 <#> selects volumes (the fourth dimension) from the series; -axes 0,1,2 includes only the three spatial axes in the output image.\u001b[39;49;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;124;43;03m For more details, see http://www.mrtrix.org/.\u001b[39;49;00m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;124;43;03m \"\"\"\u001b[39;49;00m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecutable\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmrconvert\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/design/shell.py:420\u001b[0m, in \u001b[0;36mdefine\u001b[0;34m(wrapped, inputs, outputs, bases, outputs_bases, auto_attribs, name)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(wrapped, (\u001b[38;5;28mtype\u001b[39m, \u001b[38;5;28mstr\u001b[39m)):\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwrapped must be a class or a string, not \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mwrapped\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwrapped\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m make\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/design/shell.py:320\u001b[0m, in \u001b[0;36mdefine..make\u001b[0;34m(wrapped)\u001b[0m\n\u001b[1;32m 318\u001b[0m class_name \u001b[38;5;241m=\u001b[39m klass\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m 319\u001b[0m check_explicit_fields_are_none(klass, inputs, outputs)\n\u001b[0;32m--> 320\u001b[0m parsed_inputs, parsed_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mextract_fields_from_class\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 321\u001b[0m \u001b[43m \u001b[49m\u001b[43mShellDef\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mShellOutputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mklass\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_attribs\u001b[49m\n\u001b[1;32m 322\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 324\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(wrapped, \u001b[38;5;28mstr\u001b[39m):\n", + "File \u001b[0;32m~/git/workflows/pydra/pydra/design/base.py:347\u001b[0m, in \u001b[0;36mextract_fields_from_class\u001b[0;34m(spec_type, outputs_type, klass, arg_type, out_type, auto_attribs)\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fields_dict\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(klass, spec_type):\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe canonical form of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mspec_type\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__module__\u001b[39m\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m task definitions, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mklass\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, must inherit from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mspec_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 350\u001b[0m )\n\u001b[1;32m 352\u001b[0m inputs \u001b[38;5;241m=\u001b[39m get_fields(klass, arg_type, auto_attribs, input_helps)\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "\u001b[0;31mValueError\u001b[0m: The canonical form of specs task definitions, , must inherit from " + ] + } + ], + "source": [ + "import tempfile\n", + "import numpy as np\n", + "from fileformats.medimage import Nifti1\n", + "from fileformats.medimage_mrtrix3 import (\n", + " ImageFormat as Mif, ImageHeader as Mih, ImageDataFile as Mid\n", + ")\n", + "from pydra.engine.environments import Docker\n", + "from pydra.design import workflow, python\n", + "from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold\n", + "\n", + "@workflow.define(outputs=[\"out_image\"])\n", + "def ToyMedianThreshold(in_image: Nifti1) -> Mif:\n", + " \"\"\"A toy example workflow that\n", + "\n", + " * converts a NIfTI image to MRTrix3 image format with a separate header\n", + " * loads the separate data file and selects the median value\n", + " \"\"\"\n", + "\n", + " input_conversion = workflow.add(\n", + " MrConvert(in_file=in_image, out_file=\"out_file.mih\"),\n", + " name=\"input_conversion\",\n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", + " )\n", + "\n", + " @python.define\n", + " def SelectDataFile(in_file: Mih) -> Mid:\n", + " return in_file.data_file\n", + "\n", + " select_data_file = workflow.add(SelectDataFile(in_file=input_conversion.out_file))\n", + "\n", + " @python.define\n", + " def Median(data_file: Mid) -> float:\n", + " data = np.load(data_file)\n", + " return np.median(data)\n", + "\n", + " median = workflow.add(Median(data_file=select_data_file.out))\n", + " threshold = workflow.add(\n", + " MrThreshold(\n", + " in_file=select_data_file.out,\n", + " abs=median.out\n", + " ), \n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"\")\n", + " )\n", + "\n", + " output_conversion = workflow.add(\n", + " MrConvert(in_file=threshold.out_file, out_file=\"out_image.mif\"),\n", + " name=\"output_conversion\",\n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", + " )\n", + "\n", + " return output_conversion.out_file\n", + "\n", + "\n", + "test_dir = tempfile.mkdtemp()\n", + "\n", + "nifti_file = Nifti1.sample(test_dir, seed=0)\n", + "\n", + "wf = ToyMedianThreshold(in_image=nifti_file)\n", + "\n", + "outputs = wf()\n", + "\n", + "print(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "See [Containers and Environments](../explanation/environments.rst) for more details on\n", + "how to utilise containers and add support for other software environments." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] } ], "metadata": { diff --git a/new-docs/source/tutorial/7-canonical-form.ipynb b/new-docs/source/tutorial/7-canonical-form.ipynb index b220b3d5d9..0c8b3db30a 100644 --- a/new-docs/source/tutorial/7-canonical-form.ipynb +++ b/new-docs/source/tutorial/7-canonical-form.ipynb @@ -31,7 +31,13 @@ "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", - "Default values can also be set directly, as with Attrs classes.\n" + "Default values can also be set directly, as with Attrs classes.\n", + "\n", + "In order to allow static type-checkers to check the type of outputs of tasks added\n", + "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", + "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", + "`define` method implicitly). Thus the \"canonical form\" of Python task definition is as\n", + "follows" ] }, { @@ -47,7 +53,7 @@ "\n", "\n", "@python.define\n", - "class CanonicalPythonDef:\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", " \"\"\"Canonical Python task definition class for testing\n", "\n", " Args:\n", @@ -59,7 +65,7 @@ " a: int\n", " b: float = 2.0 # set default value\n", "\n", - " class Outputs:\n", + " class Outputs(PythonOutputs):\n", " \"\"\"\n", " Args:\n", " c: Sum of a and b\n", @@ -95,7 +101,7 @@ "\n", "\n", "@python.define\n", - "class CanonicalPythonDef:\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", " \"\"\"Canonical Python task definition class for testing\n", "\n", " Args:\n", @@ -107,7 +113,7 @@ " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", "\n", - " class Outputs:\n", + " class Outputs(PythonOutputs):\n", " \"\"\"\n", " Args:\n", " c: Sum of a and b\n", @@ -125,51 +131,6 @@ "pprint(fields_dict(CanonicalPythonDef.Outputs))" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to allow static type-checkers to check the type of outputs of tasks added\n", - "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", - "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", - "`define` method implicitly). Thus the \"canonical\" is as follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "@python.define\n", - "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", - " \"\"\"Canonical Python task definition class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float = 2.0 # set default value\n", - "\n", - " class Outputs(PythonOutputs):\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a / b" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 4fb2f78a8d..70f1c566e6 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -721,7 +721,12 @@ def clear_cache(cls): """Clear the cache of constructed workflows""" cls._constructed.clear() - def add(self, task_spec: TaskDef[OutputsType], name=None) -> OutputsType: + def add( + self, + task_def: TaskDef[OutputsType], + name: str | None = None, + environment: Environment | None = None, + ) -> OutputsType: """Add a node to the workflow Parameters @@ -738,10 +743,17 @@ def add(self, task_spec: TaskDef[OutputsType], name=None) -> OutputsType: The outputs definition of the node """ if name is None: - name = type(task_spec).__name__ + name = type(task_def).__name__ if name in self._nodes: raise ValueError(f"Node with name {name!r} already exists in the workflow") - node = Node[OutputsType](name=name, definition=task_spec, workflow=self) + if environment and task_def._task_type != "shell": + raise ValueError( + "Environments can only be used with 'shell' tasks not " + f"{task_def._task_type!r} tasks ({task_def!r})" + ) + node = Node[OutputsType]( + name=name, definition=task_def, workflow=self, environment=environment + ) self._nodes[name] = node return node.lzout diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 8fd2e7ab47..7557278078 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -16,6 +16,7 @@ if ty.TYPE_CHECKING: from .core import Workflow + from .environments import Environment from pydra.engine.specs import TaskDef, TaskOutputs @@ -41,6 +42,7 @@ class Node(ty.Generic[OutputType]): name: str _definition: "TaskDef[OutputType]" + _environment: "Environment | None" = None _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False) _lzout: OutputType | None = attrs.field( init=False, default=None, eq=False, hash=False diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index be51f49db6..980c937102 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -541,13 +541,14 @@ def all_failed(self) -> bool: ) def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: - if self.node.state is None: + if not self.node.state: yield Task( definition=self.node._definition._resolve_lazy_inputs( workflow_inputs=self.workflow_inputs, graph=self.graph, ), submitter=self.submitter, + environment=self.node._environment, name=self.node.name, ) else: @@ -559,6 +560,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: state_index=index, ), submitter=self.submitter, + environment=self.node._environment, name=self.node.name, state_index=index, ) From 927d4a882895ecf2ae19b10625a2e6ddfa950c85 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Feb 2025 20:51:03 +1100 Subject: [PATCH 176/342] made all outputs and defs slots classes --- pydra/engine/node.py | 4 +-- pydra/engine/specs.py | 8 ++++- pydra/utils/hash.py | 64 ++++++++++++++++++++++++++++------ pydra/utils/misc.py | 27 ++++++++++++++ pydra/utils/tests/test_hash.py | 15 +++++--- 5 files changed, 99 insertions(+), 19 deletions(-) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 7557278078..1782a5b877 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -43,9 +43,9 @@ class Node(ty.Generic[OutputType]): name: str _definition: "TaskDef[OutputType]" _environment: "Environment | None" = None - _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False) + _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False, repr=False) _lzout: OutputType | None = attrs.field( - init=False, default=None, eq=False, hash=False + init=False, default=None, eq=False, hash=False, repr=False ) _state: State | None = attrs.field(init=False, default=NOT_SET) _cont_dim: dict[str, int] | None = attrs.field( diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 5fc522baef..817077045f 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -54,6 +54,7 @@ def is_set(value: ty.Any) -> bool: return value not in (attrs.NOTHING, EMPTY) +@attrs.define class TaskOutputs: """Base class for all output definitions""" @@ -592,6 +593,7 @@ class RuntimeSpec: network: bool = False +@attrs.define(kw_only=True, auto_attribs=False) class PythonOutputs(TaskOutputs): @classmethod @@ -620,6 +622,7 @@ def _from_task(cls, task: "Task[PythonDef]") -> Self: PythonOutputsType = ty.TypeVar("OutputType", bound=PythonOutputs) +@attrs.define(kw_only=True, auto_attribs=False) class PythonDef(TaskDef[PythonOutputsType]): _task_type: str = "python" @@ -648,6 +651,7 @@ def _run(self, task: "Task[PythonDef]") -> None: ) +@attrs.define(kw_only=True, auto_attribs=False) class WorkflowOutputs(TaskOutputs): @classmethod @@ -701,7 +705,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) -@attrs.define(kw_only=True) +@attrs.define(kw_only=True, auto_attribs=False) class WorkflowDef(TaskDef[WorkflowOutputsType]): _task_type: str = "workflow" @@ -732,6 +736,7 @@ def construct(self) -> "Workflow": STDERR_HELP = """The standard error stream produced by the command.""" +@attrs.define(kw_only=True, auto_attribs=False) class ShellOutputs(TaskOutputs): """Output definition of a generic shell process.""" @@ -892,6 +897,7 @@ def _resolve_value( ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) +@attrs.define(kw_only=True, auto_attribs=False) class ShellDef(TaskDef[ShellOutputsType]): _task_type: str = "shell" diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 333127a047..8fc941d103 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -25,6 +25,7 @@ import attrs.exceptions from fileformats.core.fileset import FileSet, MockMixin from . import user_cache_dir, add_exc_note +from .misc import is_standard_library_type logger = logging.getLogger("pydra") @@ -458,29 +459,69 @@ def bytes_repr_dict(obj: dict, cache: Cache) -> Iterator[bytes]: @register_serializer(ty._SpecialForm) @register_serializer(type) def bytes_repr_type(klass: type, cache: Cache) -> Iterator[bytes]: - def type_name(tp): + from pydra.engine.helpers import list_fields + + def type_location(tp: type) -> bytes: + """Return the module and name of the type in a ASCII byte string""" try: - name = tp.__name__ + type_name = tp.__name__ except AttributeError: - name = tp._name - return name + type_name = tp._name + return f"{klass.__module__}.{type_name}".encode() yield b"type:(" origin = ty.get_origin(klass) - if origin: - yield f"{origin.__module__}.{type_name(origin)}[".encode() - for arg in ty.get_args(klass): + args = ty.get_args(klass) + if origin and args: + yield b"origin:(" + yield from bytes_repr_type(origin, cache) + yield b"),args:(" + for arg in args: if isinstance( arg, list ): # sometimes (e.g. Callable) the args of a type is a list - yield b"[" yield from (b for t in arg for b in bytes_repr_type(t, cache)) - yield b"]" else: yield from bytes_repr_type(arg, cache) - yield b"]" + yield b")" else: - yield f"{klass.__module__}.{type_name(klass)}".encode() + if is_standard_library_type(klass): + yield type_location(klass) + elif issubclass(klass, FileSet): + yield b"mime-like:(" + klass.mime_like.encode() + b")" + elif fields := list_fields(klass): + yield b"fields:(" + yield from bytes_repr_sequence_contents(fields, cache) + yield b")" + elif attrs.has(fields): + yield b"attrs:(" + yield from bytes_repr_sequence_contents(attrs.fields(klass), cache) + yield b")" + else: + try: + dct = { + n: v for n, v in klass.__dict__.items() if not n.startswith("__") + } + except AttributeError: + yield type_location(klass) + else: + yield b"dict:(" + yield from bytes_repr_mapping_contents(dct, cache) + yield b")" + # Include annotations + try: + annotations = klass.__annotations__ + except AttributeError: + pass + else: + yield b",annotations:(" + yield from bytes_repr_mapping_contents(annotations, cache) + yield b")" + yield b",mro:(" + yield from ( + b for t in klass.mro()[1:-1] for b in bytes_repr_type(t, cache) + ) + yield b")" yield b")" @@ -612,6 +653,7 @@ def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[byte yield from bytes_repr(key, cache) yield b"=" yield bytes(hash_single(mapping[key], cache)) + yield b"," def bytes_repr_sequence_contents(seq: Sequence, cache: Cache) -> Iterator[bytes]: diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index 74b41e9826..7b9908e0ac 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -2,8 +2,10 @@ import re import ast import inspect +import sys import platformdirs import builtins +import pkgutil from pydra.engine._version import __version__ user_cache_dir = Path( @@ -143,3 +145,28 @@ def get_builtin_type_names(): A set of built-in object type names. """ return set(name for name, obj in vars(builtins).items() if isinstance(obj, type)) + + +def is_standard_library_type(obj): + """Check if a type is in the standard library.""" + module = inspect.getmodule(obj) + if module is None: + return False + return module.__name__ in STANDARD_LIBRARY_MODULES or module.__name__.startswith( + "builtins" + ) + + +def _standard_library_modules() -> frozenset[str]: + """List all standard library modules.""" + std_lib_modules = set(sys.builtin_module_names) + for _, modname, ispkg in pkgutil.iter_modules(): + if not ispkg: + std_lib_modules.add(modname) + return frozenset(std_lib_modules) + + +STANDARD_LIBRARY_MODULES: frozenset[str] = _standard_library_modules() + +# Example usage: +# print(list_standard_library_modules()) diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index 0730624f1b..19b7e1fd16 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -181,18 +181,23 @@ def test_bytes_repr_type1(): def test_bytes_repr_type1a(): obj_repr = join_bytes_repr(Zip[Json]) - assert obj_repr == rb"type:(fileformats.application.archive.Json__Zip)" + assert obj_repr == rb"type:(mime-like:(application/json+zip))" def test_bytes_repr_type2(): T = ty.TypeVar("T") class MyClass(ty.Generic[T]): - pass - obj_repr = join_bytes_repr(MyClass[int]).decode() - assert re.match( - r"type:\([\w\.]*test_hash.MyClass\[type:\(builtins.int\)\]\)", obj_repr + a: int + b: str + + def method(self, f: float) -> float: + return f + 1 + + assert join_bytes_repr(MyClass[int]) == ( + rb"type:(origin:(type:(dict:(),annotations:(),mro:(type:(typing.Generic))))," + rb"args:(type:(builtins.int)))" ) From 9c4d0077ed8f384467870a5f7da72867ff892ff2 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 13:55:39 +1100 Subject: [PATCH 177/342] fixed up typing errors and made all attributes of lazy fields "private" (i.e. prefixed by "_") --- pydra/design/tests/test_workflow.py | 31 ++++++----- pydra/engine/core.py | 41 ++++++++------- pydra/engine/lazy.py | 81 +++++++++++++++-------------- pydra/engine/node.py | 15 +++--- pydra/engine/specs.py | 12 ++--- pydra/utils/hash.py | 75 +++++++++++++------------- pydra/utils/misc.py | 10 ++-- pydra/utils/tests/test_hash.py | 2 +- pydra/utils/typing.py | 12 ++--- 9 files changed, 147 insertions(+), 132 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 2e3a8fda68..9f5f1277e0 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -65,7 +65,7 @@ def MyTestWorkflow(a, b): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + assert wf.outputs.out == LazyOutField(_node=wf["Mul"], _field="out", _type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -122,7 +122,7 @@ def MyTestShellWorkflow( assert wf.inputs.input_video == input_video assert wf.inputs.watermark == watermark assert wf.outputs.output_video == LazyOutField( - node=wf["resize"], field="out_video", type=video.Mp4, type_checked=True + _node=wf["resize"], _field="out_video", _type=video.Mp4, _type_checked=True ) assert list(wf.node_names) == ["add_watermark", "resize"] @@ -169,7 +169,7 @@ class Outputs(WorkflowOutputs): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + assert wf.outputs.out == LazyOutField(_node=wf["Mul"], _field="out", _type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -219,10 +219,10 @@ def MyTestShellWorkflow( ) wf = Workflow.construct(workflow_spec) assert wf["add_watermark"].inputs.in_video == LazyInField( - workflow=wf, field="input_video", type=video.Mp4, type_checked=True + _workflow=wf, _field="input_video", _type=video.Mp4, _type_checked=True ) assert wf["add_watermark"].inputs.watermark == LazyInField( - workflow=wf, field="watermark", type=image.Png, type_checked=True + _workflow=wf, _field="watermark", _type=image.Png, _type_checked=True ) @@ -275,10 +275,10 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 assert wf.outputs.out1 == LazyOutField( - node=wf["Mul"], field="out", type=float, type_checked=True + _node=wf["Mul"], _field="out", _type=float, _type_checked=True ) assert wf.outputs.out2 == LazyOutField( - node=wf["division"], field="divided", type=ty.Any + _node=wf["division"], _field="divided", _type=ty.Any ) assert list(wf.node_names) == ["addition", "Mul", "division"] @@ -314,8 +314,8 @@ def MyTestWorkflow(a: int, b: float): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) - assert wf.outputs.out2 == LazyOutField(node=wf["Add"], field="out", type=ty.Any) + assert wf.outputs.out1 == LazyOutField(_node=wf["Mul"], _field="out", _type=ty.Any) + assert wf.outputs.out2 == LazyOutField(_node=wf["Add"], _field="out", _type=ty.Any) assert list(wf.node_names) == ["Add", "Mul"] @@ -339,7 +339,7 @@ def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] assert wf["Mul"].combiner == ["Mul.x"] assert wf.outputs.out == LazyOutField( - node=wf["Sum"], field="out", type=list[float], type_checked=True + _node=wf["Sum"], _field="out", _type=list[float], _type_checked=True ) @@ -366,7 +366,7 @@ def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: assert wf["Add"].splitter == "_Mul" assert wf["Add"].combiner == ["Mul.x"] assert wf.outputs.out == LazyOutField( - node=wf["Sum"], field="out", type=list[float], type_checked=True + _node=wf["Sum"], _field="out", _type=list[float], _type_checked=True ) @@ -406,7 +406,7 @@ def MyTestWorkflow(a: int, b: float, c: float) -> float: assert wf.inputs.b == 10.0 assert wf.inputs.c == 2.0 assert wf.outputs.out == LazyOutField( - node=wf["NestedWorkflow"], field="out", type=float, type_checked=True + _node=wf["NestedWorkflow"], _field="out", _type=float, _type_checked=True ) assert list(wf.node_names) == ["Divide", "NestedWorkflow"] nwf_spec = copy(wf["NestedWorkflow"]._definition) @@ -415,7 +415,7 @@ def MyTestWorkflow(a: int, b: float, c: float) -> float: nwf.inputs.a == 100.0 nwf.inputs.b == 10.0 nwf.inputs.c == 2.0 - nwf.outputs.out == LazyOutField(node=nwf["Add"], field="out", type=float) + nwf.outputs.out == LazyOutField(_node=nwf["Add"], _field="out", _type=float) assert list(nwf.node_names) == ["Power", "Add"] @@ -447,5 +447,8 @@ def RecursiveNestedWorkflow(a: float, depth: int) -> float: assert wf.inputs.a == 1 assert wf.inputs.depth == 3 assert wf.outputs.out == LazyOutField( - node=wf["RecursiveNestedWorkflow"], field="out", type=float, type_checked=True + _node=wf["RecursiveNestedWorkflow"], + _field="out", + _type=float, + _type_checked=True, ) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 70f1c566e6..27eaac77e4 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -607,6 +607,9 @@ class Workflow(ty.Generic[WorkflowOutputsType]): outputs: WorkflowOutputsType = attrs.field() _nodes: dict[str, Node] = attrs.field(factory=dict) + def __repr__(self): + return f"Workflow(name={self.name!r}, defn={self.inputs!r})" + @classmethod def construct( cls, @@ -655,9 +658,9 @@ def construct( lazy_spec, lzy_inpt.name, LazyInField( - workflow=workflow, - field=lzy_inpt.name, - type=lzy_inpt.type, + _workflow=workflow, + _field=lzy_inpt.name, + _type=lzy_inpt.type, ), ) @@ -681,7 +684,7 @@ def construct( ) for outpt, outpt_lf in zip(output_fields, output_lazy_fields): # Automatically combine any uncombined state arrays into lists - if TypeParser.get_origin(outpt_lf.type) is StateArray: + if TypeParser.get_origin(outpt_lf._type) is StateArray: outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] setattr(outputs, outpt.name, outpt_lf) else: @@ -819,19 +822,19 @@ def _create_graph( lf = node._definition[field.name] if isinstance(lf, LazyOutField): # adding an edge to the graph if task id expecting output from a different task - if lf.name != self.name: + if lf._node.name != self.name: # checking if the connection is already in the graph - if (graph.node(lf.name), node) not in graph.edges: - graph.add_edges((graph.node(lf.name), node)) + if (graph.node(lf._node.name), node) not in graph.edges: + graph.add_edges((graph.node(lf._node.name), node)) if detailed: graph.add_edges_description( - (node.name, field.name, lf.name, lf.field) + (node.name, field.name, lf._node.name, lf._field) ) - logger.debug("Connecting %s to %s", lf.name, node.name) + logger.debug("Connecting %s to %s", lf._node.name, node.name) # adding a state from the previous task to other_states if ( - graph.node(lf.name).state - and graph.node(lf.name).state.splitter_rpn_final + graph.node(lf._node.name).state + and graph.node(lf._node.name).state.splitter_rpn_final ): # variables that are part of inner splitters should be # treated as a containers @@ -841,20 +844,20 @@ def _create_graph( ): node._inner_cont_dim[f"{node.name}.{field.name}"] = 1 # adding task_name: (task.state, [a field from the connection] - if lf.name not in other_states: - other_states[lf.name] = ( - graph.node(lf.name).state, + if lf._node.name not in other_states: + other_states[lf._node.name] = ( + graph.node(lf._node.name).state, [field.name], ) else: # if the task already exist in other_state, # additional field name should be added to the list of fields - other_states[lf.name][1].append(field.name) + other_states[lf._node.name][1].append(field.name) else: # LazyField with the wf input # connections with wf input should be added to the detailed graph description if detailed: graph.add_edges_description( - (node.name, field.name, lf.name, lf.field) + (node.name, field.name, lf._node.name, lf._field) ) # if task has connections state has to be recalculated @@ -882,7 +885,7 @@ def create_dotfile(self, type="simple", export=None, name=None, output_dir=None) outdir = output_dir if output_dir is not None else self.cache_dir graph = self.graph if not name: - name = f"graph_{self.name}" + name = f"graph_{self._node.name}" if type == "simple": for task in graph.nodes: self.create_connections(task) @@ -897,7 +900,9 @@ def create_dotfile(self, type="simple", export=None, name=None, output_dir=None) self.create_connections(task, detailed=True) # adding wf outputs for wf_out, lf in self._connections: - graph.add_edges_description((self.name, wf_out, lf.name, lf.field)) + graph.add_edges_description( + (self._node.name, wf_out, lf._node.name, lf.field) + ) dotfile = graph.create_dotfile_detailed(outdir=outdir, name=name) else: raise Exception( diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 37ab1f00f5..f9b416327e 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -24,17 +24,17 @@ class LazyField(ty.Generic[T], metaclass=abc.ABCMeta): """Lazy fields implement promises.""" - field: str - type: TypeOrAny - cast_from: ty.Optional[ty.Type[ty.Any]] = None - type_checked: bool = False + _field: str + _type: TypeOrAny + _cast_from: ty.Optional[ty.Type[ty.Any]] = None + _type_checked: bool = False def __bytes_repr__(self, cache): yield type(self).__name__.encode() + b"(" yield from bytes(hash_single(self.source, cache)) - yield b"field=" + self.field.encode() - yield b"type=" + bytes(hash_single(self.type, cache)) - yield b"cast_from=" + bytes(hash_single(self.cast_from, cache)) + yield b"field=" + self._field.encode() + yield b"type=" + bytes(hash_single(self._type, cache)) + yield b"cast_from=" + bytes(hash_single(self._cast_from, cache)) yield b")" def _apply_cast(self, value): @@ -42,31 +42,34 @@ def _apply_cast(self, value): the lazy-field""" from pydra.utils.typing import TypeParser - if self.cast_from: - assert TypeParser.matches(value, self.cast_from) - value = self.type(value) + if self._cast_from: + assert TypeParser.matches(value, self._cast_from) + value = self._type(value) return value @attrs.define(kw_only=True) class LazyInField(LazyField[T]): - workflow: "Workflow" = attrs.field() + _workflow: "Workflow" = attrs.field() - attr_type = "input" + _attr_type = "input" def __eq__(self, other): return ( isinstance(other, LazyInField) - and self.field == other.field - and self.type == other.type + and self._field == other._field + and self._type == other._type ) + def __repr__(self): + return f"{type(self).__name__}(field={self._field!r}, type={self._type})" + @property - def source(self): - return self.workflow + def _source(self): + return self._workflow - def get_value( + def _get_value( self, workflow_def: "WorkflowDef", ) -> ty.Any: @@ -84,7 +87,7 @@ def get_value( value : Any the resolved value of the lazy-field """ - value = workflow_def[self.field] + value = workflow_def[self._field] value = self._apply_cast(value) return value @@ -102,24 +105,26 @@ def cast(self, new_type: TypeOrAny) -> Self: a copy of the lazy field with the new type """ return type(self)[new_type]( - workflow=self.workflow, - field=self.field, - type=new_type, - cast_from=self.cast_from if self.cast_from else self.type, + _workflow=self._workflow, + _field=self._field, + _type=new_type, + _cast_from=self._cast_from if self._cast_from else self._type, ) @attrs.define(kw_only=True) class LazyOutField(LazyField[T]): - node: node.Node - attr_type = "output" + _node: node.Node + _attr_type = "output" - @property - def name(self) -> str: - return self.node.name + def __repr__(self): + return ( + f"{type(self).__name__}(node={self._node.name!r}, " + f"field={self._field!r}, type={self._type})" + ) - def get_value( + def _get_value( self, graph: "DiGraph[NodeExecution]", state_index: "StateIndex | None" = None, @@ -146,18 +151,18 @@ def get_value( if state_index is None: state_index = StateIndex() - task = graph.node(self.node.name).task(state_index) - _, split_depth = TypeParser.strip_splits(self.type) + task = graph.node(self._node.name).task(state_index) + _, split_depth = TypeParser.strip_splits(self._type) def get_nested(task: "Task[DefType]", depth: int): if isinstance(task, StateArray): val = [get_nested(task=t, depth=depth - 1) for t in task] if depth: - val = StateArray[self.type](val) + val = StateArray[self._type](val) else: if task.errored: raise ValueError( - f"Cannot retrieve value for {self.field} from {self.name} as " + f"Cannot retrieve value for {self._field} from {self._node.name} as " "the node errored" ) res = task.result() @@ -180,7 +185,7 @@ def get_nested(task: "Task[DefType]", depth: int): "`__bytes_repr__()` dunder methods to handle one or more types in " "your interface inputs." ) - val = res.get_output_field(self.field) + val = res.get_output_field(self._field) val = self._apply_cast(val) return val @@ -188,8 +193,8 @@ def get_nested(task: "Task[DefType]", depth: int): return value @property - def source(self): - return self.node + def _source(self): + return self._node def cast(self, new_type: TypeOrAny) -> Self: """ "casts" the lazy field to a new type @@ -205,8 +210,8 @@ def cast(self, new_type: TypeOrAny) -> Self: a copy of the lazy field with the new type """ return type(self)[new_type]( - node=self.node, - field=self.field, + node=self._node, + field=self._field, type=new_type, - cast_from=self.cast_from if self.cast_from else self.type, + cast_from=self._cast_from if self._cast_from else self._type, ) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 1782a5b877..cd238d6866 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -122,9 +122,10 @@ def lzout(self) -> OutputType: outputs = self.inputs.Outputs(**lazy_fields) # Flag the output lazy fields as being not typed checked (i.e. assigned to another # node's inputs) yet + outpt: lazy.LazyOutField for outpt in attrs_values(outputs).values(): - outpt.type_checked = False - outputs._node = self + outpt._type_checked = False + outpt._node = self self._lzout = outputs self._wrap_lzout_types_in_state_arrays() return outputs @@ -223,11 +224,11 @@ def _wrap_lzout_types_in_state_arrays(self) -> None: return outpt_lf: lazy.LazyOutField for outpt_lf in attrs_values(self.lzout).values(): - assert not outpt_lf.type_checked - type_, _ = TypeParser.strip_splits(outpt_lf.type) + assert not outpt_lf._type_checked + type_, _ = TypeParser.strip_splits(outpt_lf._type) for _ in range(self._state.depth): type_ = StateArray[type_] - outpt_lf.type = type_ + outpt_lf._type = type_ def _set_state(self) -> None: # Add node name to state's splitter, combiner and cont_dim loaded from the def @@ -267,8 +268,8 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: """Get the states of the upstream nodes that are connected to this node""" upstream_states = {} for inpt_name, val in self.input_values: - if isinstance(val, lazy.LazyOutField) and val.node.state: - node: Node = val.node + if isinstance(val, lazy.LazyOutField) and val._node.state: + node: Node = val._node # variables that are part of inner splitters should be treated as a containers if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: node._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 817077045f..aa836d062e 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -422,9 +422,9 @@ def _resolve_lazy_inputs( resolved = {} for name, value in attrs_values(self).items(): if isinstance(value, LazyInField): - resolved[name] = value.get_value(workflow_inputs) + resolved[name] = value._get_value(workflow_inputs) elif isinstance(value, LazyOutField): - resolved[name] = value.get_value(graph, state_index) + resolved[name] = value._get_value(graph, state_index) return attrs.evolve(self, **resolved) def _check_rules(self): @@ -677,11 +677,11 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: nodes_dict = {n.name: n for n in exec_graph.nodes} for name, lazy_field in attrs_values(workflow.outputs).items(): try: - val_out = lazy_field.get_value(exec_graph) + val_out = lazy_field._get_value(exec_graph) output_wf[name] = val_out except (ValueError, AttributeError): output_wf[name] = None - node: "NodeExecution" = nodes_dict[lazy_field.name] + node: "NodeExecution" = nodes_dict[lazy_field._node.name] # checking if the tasks has predecessors that raises error if isinstance(node.errored, list): raise ValueError(f"Tasks {node._errored} raised an error") @@ -691,7 +691,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: if not err_files: raise raise ValueError( - f"Task {lazy_field.name} raised an error, full crash report is " + f"Task {lazy_field._node.name} raised an error, full crash report is " f"here: " + ( str(err_files[0]) @@ -712,7 +712,7 @@ class WorkflowDef(TaskDef[WorkflowOutputsType]): RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("construct",) - _constructed = attrs.field(default=None, init=False) + _constructed = attrs.field(default=None, init=False, repr=False, eq=False) def _run(self, task: "Task[WorkflowDef]") -> None: """Run the workflow.""" diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 8fc941d103..41e753b2f9 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -24,8 +24,9 @@ from filelock import SoftFileLock import attrs.exceptions from fileformats.core.fileset import FileSet, MockMixin +import fileformats.core.exceptions from . import user_cache_dir, add_exc_note -from .misc import is_standard_library_type +from .misc import in_stdlib logger = logging.getLogger("pydra") @@ -485,18 +486,17 @@ def type_location(tp: type) -> bytes: yield from bytes_repr_type(arg, cache) yield b")" else: - if is_standard_library_type(klass): + if in_stdlib(klass): yield type_location(klass) elif issubclass(klass, FileSet): - yield b"mime-like:(" + klass.mime_like.encode() + b")" + try: + yield b"mime-like:(" + klass.mime_like.encode() + b")" + except fileformats.core.exceptions.FormatDefinitionError: + yield type_location(klass) elif fields := list_fields(klass): yield b"fields:(" yield from bytes_repr_sequence_contents(fields, cache) yield b")" - elif attrs.has(fields): - yield b"attrs:(" - yield from bytes_repr_sequence_contents(attrs.fields(klass), cache) - yield b")" else: try: dct = { @@ -603,37 +603,40 @@ def bytes_repr_function(obj: types.FunctionType, cache: Cache) -> Iterator[bytes """Serialize a function, attempting to use the AST of the source code if available otherwise falling back to the byte-code of the function.""" yield b"function:(" - try: - src = inspect.getsource(obj) - except OSError: - # Fallback to using the bytes representation of the code object - yield from bytes_repr(obj.__code__, cache) + if in_stdlib(obj): + yield f"{obj.__module__}.{obj.__name__}".encode() else: + try: + src = inspect.getsource(obj) + except OSError: + # Fallback to using the bytes representation of the code object + yield from bytes_repr(obj.__code__, cache) + else: - def dump_ast(node: ast.AST) -> bytes: - return ast.dump( - node, annotate_fields=False, include_attributes=False - ).encode() - - def strip_annotations(node: ast.AST): - """Remove annotations from function arguments.""" - for arg in node.args.args: - arg.annotation = None - for arg in node.args.kwonlyargs: - arg.annotation = None - if node.args.vararg: - node.args.vararg.annotation = None - if node.args.kwarg: - node.args.kwarg.annotation = None - - indent = re.match(r"(\s*)", src).group(1) - if indent: - src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) - func_ast = ast.parse(src).body[0] - strip_annotations(func_ast) - yield dump_ast(func_ast.args) - for stmt in func_ast.body: - yield dump_ast(stmt) + def dump_ast(node: ast.AST) -> bytes: + return ast.dump( + node, annotate_fields=False, include_attributes=False + ).encode() + + def strip_annotations(node: ast.AST): + """Remove annotations from function arguments.""" + for arg in node.args.args: + arg.annotation = None + for arg in node.args.kwonlyargs: + arg.annotation = None + if node.args.vararg: + node.args.vararg.annotation = None + if node.args.kwarg: + node.args.kwarg.annotation = None + + indent = re.match(r"(\s*)", src).group(1) + if indent: + src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) + func_ast = ast.parse(src).body[0] + strip_annotations(func_ast) + yield dump_ast(func_ast.args) + for stmt in func_ast.body: + yield dump_ast(stmt) yield b")" diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index 7b9908e0ac..fc0ca4ca30 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -147,17 +147,15 @@ def get_builtin_type_names(): return set(name for name, obj in vars(builtins).items() if isinstance(obj, type)) -def is_standard_library_type(obj): +def in_stdlib(obj): """Check if a type is in the standard library.""" module = inspect.getmodule(obj) if module is None: return False - return module.__name__ in STANDARD_LIBRARY_MODULES or module.__name__.startswith( - "builtins" - ) + return module.__name__ in STDLIB_MODULES or module.__name__.startswith("builtins") -def _standard_library_modules() -> frozenset[str]: +def _stdlib_modules() -> frozenset[str]: """List all standard library modules.""" std_lib_modules = set(sys.builtin_module_names) for _, modname, ispkg in pkgutil.iter_modules(): @@ -166,7 +164,7 @@ def _standard_library_modules() -> frozenset[str]: return frozenset(std_lib_modules) -STANDARD_LIBRARY_MODULES: frozenset[str] = _standard_library_modules() +STDLIB_MODULES: frozenset[str] = _stdlib_modules() # Example usage: # print(list_standard_library_modules()) diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index 19b7e1fd16..f5171f0ec8 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -50,7 +50,7 @@ def test_bytes_repr_builtins(): assert complex_repr == b"complex:" + bytes(16) # Dicts are sorted by key, and values are hashed dict_repr = join_bytes_repr({"b": "c", "a": 0}) - assert re.match(rb"dict:{str:1:a=.{16}str:1:b=.{16}}$", dict_repr) + assert re.match(rb"dict:{str:1:a=.{16},str:1:b=.{16},}$", dict_repr) # Lists and tuples concatenate hashes of their contents list_repr = join_bytes_repr([1, 2, 3]) assert re.match(rb"list:\(.{48}\)$", list_repr) diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 5698a7edac..158dade6f0 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -221,19 +221,19 @@ def __call__(self, obj: ty.Any) -> T: coerced = attr.NOTHING # type: ignore[assignment] elif is_lazy(obj): try: - self.check_type(obj.type) + self.check_type(obj._type) except TypeError as e: if self.superclass_auto_cast: try: # Check whether the type of the lazy field isn't a superclass of # the type to check against, and if so, allow it due to permissive # typing rules. - TypeParser(obj.type, match_any_of_union=True).check_type( + TypeParser(obj._type, match_any_of_union=True).check_type( self.tp ) except TypeError: raise TypeError( - f"Incorrect type for lazy field{self.label_str}: {obj.type!r} " + f"Incorrect type for lazy field{self.label_str}: {obj._type!r} " f"is not a subclass or superclass of {self.tp} (and will not " "be able to be coerced to one that is)" ) from e @@ -247,14 +247,14 @@ def __call__(self, obj: ty.Any) -> T: ) else: raise TypeError( - f"Incorrect type for lazy field{self.label_str}: {obj.type!r} " + f"Incorrect type for lazy field{self.label_str}: {obj._type!r} " f"is not a subclass of {self.tp} (and will not be able to be " "coerced to one that is)" ) from e coerced = obj # type: ignore - if obj.type is not ty.Any: + if obj._type is not ty.Any: # Used to check whether the type of the field can be changed - obj.type_checked = True + obj._type_checked = True elif isinstance(obj, StateArray): coerced = StateArray(self(o) for o in obj) # type: ignore[assignment] else: From 8415d8031220d3b28e324def03090fb38d858ba9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 17:59:10 +1100 Subject: [PATCH 178/342] fixed up workflow caching and type hashing --- pydra/design/tests/test_workflow.py | 73 +++++++++++++------ pydra/design/workflow.py | 29 +++++++- pydra/engine/core.py | 104 ++++++++++++++++----------- pydra/engine/lazy.py | 41 ----------- pydra/engine/specs.py | 16 ++--- pydra/engine/tests/test_functions.py | 13 +--- pydra/engine/tests/test_specs.py | 6 +- pydra/utils/hash.py | 16 +++-- pydra/utils/misc.py | 9 ++- pydra/utils/tests/test_hash.py | 50 ++++++++----- pydra/utils/tests/test_typing.py | 4 +- 11 files changed, 202 insertions(+), 159 deletions(-) diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 9f5f1277e0..4e49f0e7dd 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -1,6 +1,6 @@ from operator import attrgetter from copy import copy -import pytest +from unittest.mock import Mock import attrs from pydra.engine.lazy import LazyInField, LazyOutField import typing as ty @@ -8,6 +8,7 @@ from pydra.engine.helpers import list_fields from pydra.engine.specs import WorkflowDef, WorkflowOutputs from pydra.engine.core import Workflow +from pydra.utils.hash import hash_function from fileformats import video, image # NB: We use PascalCase for interfaces and workflow functions as it is translated into a class @@ -65,7 +66,7 @@ def MyTestWorkflow(a, b): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyOutField(_node=wf["Mul"], _field="out", _type=ty.Any) + assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -122,7 +123,7 @@ def MyTestShellWorkflow( assert wf.inputs.input_video == input_video assert wf.inputs.watermark == watermark assert wf.outputs.output_video == LazyOutField( - _node=wf["resize"], _field="out_video", _type=video.Mp4, _type_checked=True + node=wf["resize"], field="out_video", type=video.Mp4, type_checked=True ) assert list(wf.node_names) == ["add_watermark", "resize"] @@ -169,7 +170,7 @@ class Outputs(WorkflowOutputs): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out == LazyOutField(_node=wf["Mul"], _field="out", _type=ty.Any) + assert wf.outputs.out == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) # Nodes are named after the specs by default assert list(wf.node_names) == ["Add", "Mul"] @@ -177,7 +178,7 @@ class Outputs(WorkflowOutputs): def test_workflow_lazy(): - @workflow.define(lazy=["input_video", "watermark"]) + @workflow.define def MyTestShellWorkflow( input_video: video.Mp4, watermark: image.Png, @@ -211,20 +212,46 @@ def MyTestShellWorkflow( return output_video # test implicit detection of output name - input_video = video.Mp4.mock("input.mp4") - watermark = image.Png.mock("watermark.png") + # input_video = video.Mp4.mock("input.mp4") + # watermark = image.Png.mock("watermark.png") + mock_node = Mock() + mock_node.name = "mock_node" workflow_spec = MyTestShellWorkflow( - input_video=input_video, - watermark=watermark, + input_video=LazyOutField(node=mock_node, field="a_video", type=video.Mp4), + watermark=LazyOutField(node=mock_node, field="a_watermark", type=image.Png), ) + Workflow.clear_cache(definition=MyTestShellWorkflow) wf = Workflow.construct(workflow_spec) assert wf["add_watermark"].inputs.in_video == LazyInField( - _workflow=wf, _field="input_video", _type=video.Mp4, _type_checked=True + workflow=wf, field="input_video", type=video.Mp4, type_checked=True ) assert wf["add_watermark"].inputs.watermark == LazyInField( - _workflow=wf, _field="watermark", _type=image.Png, _type_checked=True + workflow=wf, field="watermark", type=image.Png, type_checked=True ) + # Check to see that the cache is populated with the new workflow + workflow_cache = Workflow._constructed_cache[hash_function(MyTestShellWorkflow)] + # The non-lazy keys used to construct the workflow + key_set = frozenset(["watermark_dims", "constructor"]) + assert list(workflow_cache) == [key_set] + assert len(workflow_cache[key_set]) == 1 + + # check to see that the cache is not used if we change the value of one of the + # non lazy fields + workflow_spec.watermark_dims = (20, 20) + wf2 = Workflow.construct(workflow_spec) + assert wf2 is not wf + assert list(workflow_cache) == [key_set] + assert len(workflow_cache[key_set]) == 2 + + # check to see that the cache is used if we provide a concrete value for one of the + # lazy fields + workflow_spec.input_video = video.Mp4.mock("input.mp4") + wf3 = Workflow.construct(workflow_spec) + assert wf3 is wf2 + assert list(workflow_cache) == [key_set] + assert len(workflow_cache[key_set]) == 2 + def test_direct_access_of_workflow_object(): @@ -275,10 +302,10 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 assert wf.outputs.out1 == LazyOutField( - _node=wf["Mul"], _field="out", _type=float, _type_checked=True + node=wf["Mul"], field="out", type=float, type_checked=True ) assert wf.outputs.out2 == LazyOutField( - _node=wf["division"], _field="divided", _type=ty.Any + node=wf["division"], field="divided", type=ty.Any ) assert list(wf.node_names) == ["addition", "Mul", "division"] @@ -314,8 +341,8 @@ def MyTestWorkflow(a: int, b: float): wf = Workflow.construct(workflow_spec) assert wf.inputs.a == 1 assert wf.inputs.b == 2.0 - assert wf.outputs.out1 == LazyOutField(_node=wf["Mul"], _field="out", _type=ty.Any) - assert wf.outputs.out2 == LazyOutField(_node=wf["Add"], _field="out", _type=ty.Any) + assert wf.outputs.out1 == LazyOutField(node=wf["Mul"], field="out", type=ty.Any) + assert wf.outputs.out2 == LazyOutField(node=wf["Add"], field="out", type=ty.Any) assert list(wf.node_names) == ["Add", "Mul"] @@ -339,7 +366,7 @@ def MyTestWorkflow(a: list[int], b: list[float]) -> list[float]: assert wf["Mul"].splitter == ["Mul.x", "Mul.y"] assert wf["Mul"].combiner == ["Mul.x"] assert wf.outputs.out == LazyOutField( - _node=wf["Sum"], _field="out", _type=list[float], _type_checked=True + node=wf["Sum"], field="out", type=list[float], type_checked=True ) @@ -366,7 +393,7 @@ def MyTestWorkflow(a: list[int], b: list[float], c: float) -> list[float]: assert wf["Add"].splitter == "_Mul" assert wf["Add"].combiner == ["Mul.x"] assert wf.outputs.out == LazyOutField( - _node=wf["Sum"], _field="out", _type=list[float], _type_checked=True + node=wf["Sum"], field="out", type=list[float], type_checked=True ) @@ -406,7 +433,7 @@ def MyTestWorkflow(a: int, b: float, c: float) -> float: assert wf.inputs.b == 10.0 assert wf.inputs.c == 2.0 assert wf.outputs.out == LazyOutField( - _node=wf["NestedWorkflow"], _field="out", _type=float, _type_checked=True + node=wf["NestedWorkflow"], field="out", type=float, type_checked=True ) assert list(wf.node_names) == ["Divide", "NestedWorkflow"] nwf_spec = copy(wf["NestedWorkflow"]._definition) @@ -415,7 +442,7 @@ def MyTestWorkflow(a: int, b: float, c: float) -> float: nwf.inputs.a == 100.0 nwf.inputs.b == 10.0 nwf.inputs.c == 2.0 - nwf.outputs.out == LazyOutField(_node=nwf["Add"], _field="out", _type=float) + nwf.outputs.out == LazyOutField(node=nwf["Add"], field="out", type=float) assert list(nwf.node_names) == ["Power", "Add"] @@ -447,8 +474,8 @@ def RecursiveNestedWorkflow(a: float, depth: int) -> float: assert wf.inputs.a == 1 assert wf.inputs.depth == 3 assert wf.outputs.out == LazyOutField( - _node=wf["RecursiveNestedWorkflow"], - _field="out", - _type=float, - _type_checked=True, + node=wf["RecursiveNestedWorkflow"], + field="out", + type=float, + type_checked=True, ) diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 020cad7622..7043da1ca3 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -58,7 +58,7 @@ class arg(Arg): through to the tasks, by default it is False """ - lazy: bool = False + pass @attrs.define @@ -223,3 +223,30 @@ def add(task_def: "TaskDef[OutputsType]", name: str = None) -> OutputsType: The outputs definition of the node """ return this().add(task_def, name=name) + + +U = ty.TypeVar("U") + + +def cast(field: ty.Any, new_type: type[U]) -> U: + """Cast a lazy field to a new type. Note that the typing in the signature is a white + lie, as the return field is actually a LazyField as placeholder for the object of + type U. + + Parameters + ---------- + field : LazyField[T] + The field to cast + new_type : type[U] + The new type to cast the field to + + Returns + ------- + LazyField[U] + A copy of the lazy field with the new type + """ + return attrs.evolve( + field, + type=new_type, + cast_from=field._cast_from if field._cast_from else field._type, + ) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 27eaac77e4..331b54d837 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -13,7 +13,7 @@ import attr import cloudpickle as cp from copy import copy -from operator import itemgetter +from collections import defaultdict from typing import Self import attrs from filelock import SoftFileLock @@ -21,7 +21,7 @@ from pydra.engine.graph import DiGraph from pydra.engine import state from .lazy import LazyInField, LazyOutField -from pydra.utils.hash import hash_function +from pydra.utils.hash import hash_function, Cache from pydra.utils.typing import TypeParser, StateArray from .node import Node from datetime import datetime @@ -610,6 +610,16 @@ class Workflow(ty.Generic[WorkflowOutputsType]): def __repr__(self): return f"Workflow(name={self.name!r}, defn={self.inputs!r})" + @classmethod + def clear_cache( + cls, definition: WorkflowDef[WorkflowOutputsType] | None = None + ) -> None: + """Clear the cache of constructed workflows""" + if definition is None: + cls._constructed_cache = defaultdict(lambda: defaultdict(dict)) + else: + cls._constructed_cache[hash_function(definition)] = defaultdict(dict) + @classmethod def construct( cls, @@ -617,29 +627,37 @@ def construct( ) -> Self: """Construct a workflow from a definition, caching the constructed worklow""" - lazy_inputs = [f for f in list_fields(type(definition)) if f.lazy] - - # Create a cache key by hashing all the non-lazy input values in the definition - # and use this to store the constructed workflow in case it is reused or nested - # and split over within another workflow - lazy_input_names = {f.name for f in lazy_inputs} - non_lazy_vals = tuple( - sorted( - ( - i - for i in attrs_values(definition).items() - if i[0] not in lazy_input_names - ), - key=itemgetter(0), - ) - ) - if lazy_non_lazy_vals := [f for f in non_lazy_vals if is_lazy(f[1])]: - raise ValueError( - f"Lazy input fields {lazy_non_lazy_vals} found in non-lazy fields " - ) - hash_key = hash_function(non_lazy_vals) - if hash_key in cls._constructed: - return cls._constructed[hash_key] + # Check the previously constructed workflows to see if a workflow has been + # constructed for the given set of inputs, or a less-specific set (i.e. with a + # super-set of lazy inputs), and use that if it exists + + non_lazy_vals = { + n: v for n, v in attrs_values(definition).items() if not is_lazy(v) + } + non_lazy_keys = frozenset(non_lazy_vals) + hash_cache = Cache() # share the hash cache to avoid recalculations + non_lazy_hash = hash_function(non_lazy_vals, cache=hash_cache) + defn_hash = hash_function(type(definition), cache=hash_cache) + # Check for same non-lazy inputs + try: + defn_cache = cls._constructed_cache[defn_hash] + except KeyError: + pass + else: + if ( + non_lazy_keys in defn_cache + and non_lazy_hash in defn_cache[non_lazy_keys] + ): + return defn_cache[non_lazy_keys][non_lazy_hash] + # Check for supersets of lazy inputs + for key_set, key_set_cache in defn_cache.items(): + if key_set.issubset(non_lazy_keys): + subset_vals = { + k: v for k, v in non_lazy_vals.items() if k in key_set + } + subset_hash = hash_function(subset_vals, cache=hash_cache) + if subset_hash in key_set_cache: + return key_set_cache[subset_hash] # Initialise the outputs of the workflow outputs = definition.Outputs( @@ -653,16 +671,19 @@ def construct( inputs=lazy_spec, outputs=outputs, ) - for lzy_inpt in lazy_inputs: - setattr( - lazy_spec, - lzy_inpt.name, - LazyInField( - _workflow=workflow, - _field=lzy_inpt.name, - _type=lzy_inpt.type, - ), - ) + # Set lazy inputs to the workflow, need to do it after the workflow is initialised + # so a back ref to the workflow can be set in the lazy field + for field in list_fields(definition): + if field.name not in non_lazy_keys: + setattr( + lazy_spec, + field.name, + LazyInField( + workflow=workflow, + field=field.name, + type=field.type, + ), + ) input_values = attrs_values(lazy_spec) constructor = input_values.pop("constructor") @@ -685,7 +706,7 @@ def construct( for outpt, outpt_lf in zip(output_fields, output_lazy_fields): # Automatically combine any uncombined state arrays into lists if TypeParser.get_origin(outpt_lf._type) is StateArray: - outpt_lf.type = list[TypeParser.strip_splits(outpt_lf.type)[0]] + outpt_lf._type = list[TypeParser.strip_splits(outpt_lf._type)[0]] setattr(outputs, outpt.name, outpt_lf) else: if unset_outputs := [ @@ -696,7 +717,7 @@ def construct( f"constructor of {workflow!r}" ) - cls._constructed[hash_key] = workflow + cls._constructed_cache[defn_hash][non_lazy_keys][non_lazy_hash] = workflow return workflow @@ -719,11 +740,6 @@ def under_construction(cls) -> "Workflow[ty.Any]": "`Workflow.construct` in the current call stack" ) - @classmethod - def clear_cache(cls): - """Clear the cache of constructed workflows""" - cls._constructed.clear() - def add( self, task_def: TaskDef[OutputsType], @@ -772,7 +788,9 @@ def node_names(self) -> list[str]: return list(self._nodes) # Used to cache the constructed workflows by their hashed input values - _constructed: dict[int, "Workflow[ty.Any]"] = {} + _constructed_cache: dict[ + str, dict[frozenset[str], dict[str, "Workflow[ty.Any]"]] + ] = defaultdict(lambda: defaultdict(dict)) def execution_graph(self, submitter: "Submitter") -> DiGraph: from pydra.engine.submitter import NodeExecution diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index f9b416327e..f668acfc1b 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -1,6 +1,5 @@ import typing as ty import abc -from typing import Self import attrs from pydra.utils.typing import StateArray from pydra.utils.hash import hash_single @@ -91,26 +90,6 @@ def _get_value( value = self._apply_cast(value) return value - def cast(self, new_type: TypeOrAny) -> Self: - """ "casts" the lazy field to a new type - - Parameters - ---------- - new_type : type - the type to cast the lazy-field to - - Returns - ------- - cast_field : LazyInField - a copy of the lazy field with the new type - """ - return type(self)[new_type]( - _workflow=self._workflow, - _field=self._field, - _type=new_type, - _cast_from=self._cast_from if self._cast_from else self._type, - ) - @attrs.define(kw_only=True) class LazyOutField(LazyField[T]): @@ -195,23 +174,3 @@ def get_nested(task: "Task[DefType]", depth: int): @property def _source(self): return self._node - - def cast(self, new_type: TypeOrAny) -> Self: - """ "casts" the lazy field to a new type - - Parameters - ---------- - new_type : type - the type to cast the lazy-field to - - Returns - ------- - cast_field : LazyOutField - a copy of the lazy field with the new type - """ - return type(self)[new_type]( - node=self._node, - field=self._field, - type=new_type, - cast_from=self._cast_from if self._cast_from else self._type, - ) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index aa836d062e..9e86a9abc1 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -121,8 +121,6 @@ def __getitem__(self, name_or_index: str | int) -> ty.Any: class TaskDef(ty.Generic[OutputsType]): """Base class for all task definitions""" - _task_type: str - # The following fields are used to store split/combine state information _splitter = attrs.field(default=None, init=False, repr=False) _combiner = attrs.field(default=None, init=False, repr=False) @@ -625,7 +623,7 @@ def _from_task(cls, task: "Task[PythonDef]") -> Self: @attrs.define(kw_only=True, auto_attribs=False) class PythonDef(TaskDef[PythonOutputsType]): - _task_type: str = "python" + _task_type = "python" def _run(self, task: "Task[PythonDef]") -> None: # Prepare the inputs to the function @@ -708,7 +706,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: @attrs.define(kw_only=True, auto_attribs=False) class WorkflowDef(TaskDef[WorkflowOutputsType]): - _task_type: str = "workflow" + _task_type = "workflow" RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("construct",) @@ -900,7 +898,7 @@ def _resolve_value( @attrs.define(kw_only=True, auto_attribs=False) class ShellDef(TaskDef[ShellOutputsType]): - _task_type: str = "shell" + _task_type = "shell" BASE_NAMES = ["additional_args"] @@ -953,7 +951,7 @@ def _command_args( inputs.update(input_updates) inputs.update(modified_inputs) pos_args = [] # list for (position, command arg) - self._positions_provided = [] + positions_provided = [] for field in list_fields(self): name = field.name value = inputs[name] @@ -970,6 +968,7 @@ def _command_args( inputs=inputs, root=root, output_dir=output_dir, + positions_provided=positions_provided, ) if pos_val: pos_args.append(pos_val) @@ -1005,6 +1004,7 @@ def _command_pos_args( value: ty.Any, inputs: dict[str, ty.Any], output_dir: Path, + positions_provided: list[str], root: Path | None = None, ) -> tuple[int, ty.Any]: """ @@ -1022,12 +1022,12 @@ def _command_pos_args( f"position should be an integer, but {field.position} given" ) # checking if the position is not already used - if field.position in self._positions_provided: + if field.position in positions_provided: raise Exception( f"{field.name} can't have provided position, {field.position} is already used" ) - self._positions_provided.append(field.position) + positions_provided.append(field.position) if value and isinstance(value, str): if root: # values from templates diff --git a/pydra/engine/tests/test_functions.py b/pydra/engine/tests/test_functions.py index 6008aa4a29..ab9ee9cd43 100644 --- a/pydra/engine/tests/test_functions.py +++ b/pydra/engine/tests/test_functions.py @@ -31,7 +31,7 @@ class Canonical(PythonDef["Canonical.Outputs"]): a: ty.Any class Outputs(PythonOutputs): - out: ty.Any + out: int @staticmethod def function(a: int) -> int: @@ -238,14 +238,3 @@ def Square(in_val): outputs = Square(in_val=2.0)() assert outputs.squared == 4.0 assert outputs.cubed == 8.0 - - -def test_return_halfannotated_task_multiple_output(): - - @python.define(inputs={"in_val": float}, outputs=(float, float)) - def Square(in_val): - return in_val**2, in_val**3 - - outputs = Square(in_val=2.0)() - assert outputs.out1 == 4.0 - assert outputs.out2 == 8.0 diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 088ea22931..07b4b18909 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -20,7 +20,7 @@ # from ..helpers import make_klass from .utils import Foo -from pydra.design import python +from pydra.design import python, workflow import pytest @@ -346,8 +346,8 @@ def test_input_file_hash_5(tmp_path): def test_lazy_field_cast(): task = Foo(a="a", b=1, c=2.0, name="foo") - assert task.lzout.y.type == int - assert task.lzout.y.cast(float).type == float + assert task.lzout.y._type is int + assert workflow.cast(task.lzout.y, float)._type is float def test_lazy_field_multi_same_split(): diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 41e753b2f9..a7386e1791 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -329,7 +329,7 @@ def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: if attrs.has(type(obj)): # Drop any attributes that aren't used in comparisons by default dct = attrs.asdict(obj, recurse=False, filter=lambda a, _: bool(a.eq)) - elif hasattr(obj, "__slots__"): + elif hasattr(obj, "__slots__") and obj.__slots__ is not None: dct = {attr: getattr(obj, attr) for attr in obj.__slots__} else: try: @@ -481,14 +481,14 @@ def type_location(tp: type) -> bytes: if isinstance( arg, list ): # sometimes (e.g. Callable) the args of a type is a list + yield b"list:(" yield from (b for t in arg for b in bytes_repr_type(t, cache)) + yield b")" else: yield from bytes_repr_type(arg, cache) yield b")" else: - if in_stdlib(klass): - yield type_location(klass) - elif issubclass(klass, FileSet): + if inspect.isclass(klass) and issubclass(klass, FileSet): try: yield b"mime-like:(" + klass.mime_like.encode() + b")" except fileformats.core.exceptions.FormatDefinitionError: @@ -497,6 +497,12 @@ def type_location(tp: type) -> bytes: yield b"fields:(" yield from bytes_repr_sequence_contents(fields, cache) yield b")" + if hasattr(klass, "Outputs"): + yield b",outputs:(" + yield from bytes_repr_type(klass.Outputs, cache) + yield b")" + elif in_stdlib(klass): + yield type_location(klass) else: try: dct = { @@ -505,7 +511,7 @@ def type_location(tp: type) -> bytes: except AttributeError: yield type_location(klass) else: - yield b"dict:(" + yield b"__dict__:(" yield from bytes_repr_mapping_contents(dct, cache) yield b")" # Include annotations diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index fc0ca4ca30..df3ae280fa 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -2,6 +2,7 @@ import re import ast import inspect +import types import sys import platformdirs import builtins @@ -147,12 +148,16 @@ def get_builtin_type_names(): return set(name for name, obj in vars(builtins).items() if isinstance(obj, type)) -def in_stdlib(obj): +def in_stdlib(obj: types.FunctionType | type) -> bool: """Check if a type is in the standard library.""" module = inspect.getmodule(obj) if module is None: return False - return module.__name__ in STDLIB_MODULES or module.__name__.startswith("builtins") + if module.__name__.startswith("builtins"): + return True + if module.__name__ == "types" and obj.__name__ not in dir(types): + return False + return module.__name__.split(".")[-1] in STDLIB_MODULES def _stdlib_modules() -> frozenset[str]: diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py index f5171f0ec8..abc4f2a444 100644 --- a/pydra/utils/tests/test_hash.py +++ b/pydra/utils/tests/test_hash.py @@ -75,7 +75,7 @@ def test_bytes_repr_builtins(): (1, "6dc1db8d4dcdd8def573476cbb90cce0"), (12345678901234567890, "2b5ba668c1e8ea4902361b8d81e53074"), (1.0, "29492927b2e505840235e15a5be9f79a"), - ({"b": "c", "a": 0}, "2405cd36f4e4b6318c033f32db289f7d"), + ({"b": "c", "a": 0}, "04e5c65ec2269775d3b9ccecaf10da38"), ([1, 2, 3], "2f8902ff90f63d517bd6f6e6111e15b8"), ((1, 2, 3), "054a7b31c29e7875a6f83ff1dcb4841b"), ], @@ -142,7 +142,7 @@ def __init__(self, x): self.x = x obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_slots_obj(): @@ -153,7 +153,7 @@ def __init__(self, x): self.x = x obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_attrs_slots(): @@ -162,7 +162,7 @@ class MyClass: x: int obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_attrs_no_slots(): @@ -171,7 +171,7 @@ class MyClass: x: int obj_repr = join_bytes_repr(MyClass(1)) - assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + assert re.match(rb".*\.MyClass:{str:1:x=.{16},}", obj_repr) def test_bytes_repr_type1(): @@ -195,22 +195,30 @@ class MyClass(ty.Generic[T]): def method(self, f: float) -> float: return f + 1 - assert join_bytes_repr(MyClass[int]) == ( - rb"type:(origin:(type:(dict:(),annotations:(),mro:(type:(typing.Generic))))," - rb"args:(type:(builtins.int)))" + obj_repr = join_bytes_repr(MyClass[int]) + assert re.match( + ( + rb"type:\(origin:\(type:\(__dict__:\(str:6:method=.{16},\),annotations:\(str:1:a=.{16}," + rb"str:1:b=.{16},\),mro:\(type:\(typing.Generic\)\)\)\),args:\(type:\(builtins.int\)\)\)" + ), + obj_repr, ) def test_bytes_special_form1(): obj_repr = join_bytes_repr(ty.Union[int, float]) - assert obj_repr == b"type:(typing.Union[type:(builtins.int)type:(builtins.float)])" + assert obj_repr == ( + b"type:(origin:(type:(typing.Union)),args:(type:(builtins.int)" + b"type:(builtins.float)))" + ) @pytest.mark.skipif(condition=sys.version_info < (3, 10), reason="requires python3.10") def test_bytes_special_form1a(): obj_repr = join_bytes_repr(int | float) - assert ( - obj_repr == b"type:(types.UnionType[type:(builtins.int)type:(builtins.float)])" + assert obj_repr == ( + b"type:(origin:(type:(types.UnionType)),args:(type:(builtins.int)" + b"type:(builtins.float)))" ) @@ -221,30 +229,34 @@ def test_bytes_special_form2(): def test_bytes_special_form3(): obj_repr = join_bytes_repr(ty.Optional[Path]) - assert ( - obj_repr == b"type:(typing.Union[type:(pathlib.Path)type:(builtins.NoneType)])" + assert obj_repr == ( + b"type:(origin:(type:(typing.Union)),args:(type:(pathlib.Path)" + b"type:(builtins.NoneType)))" ) @pytest.mark.skipif(condition=sys.version_info < (3, 10), reason="requires python3.10") def test_bytes_special_form3a(): obj_repr = join_bytes_repr(Path | None) - assert ( - obj_repr - == b"type:(types.UnionType[type:(pathlib.Path)type:(builtins.NoneType)])" + assert obj_repr == ( + b"type:(origin:(type:(types.UnionType)),args:(type:(pathlib.Path)" + b"type:(builtins.NoneType)))" ) def test_bytes_special_form4(): obj_repr = join_bytes_repr(ty.Type[Path]) - assert obj_repr == b"type:(builtins.type[type:(pathlib.Path)])" + assert ( + obj_repr == b"type:(origin:(type:(builtins.type)),args:(type:(pathlib.Path)))" + ) def test_bytes_special_form5(): obj_repr = join_bytes_repr(ty.Callable[[Path, int], ty.Tuple[float, str]]) assert obj_repr == ( - b"type:(collections.abc.Callable[[type:(pathlib.Path)type:(builtins.int)]" - b"type:(builtins.tuple[type:(builtins.float)type:(builtins.str)])])" + b"type:(origin:(type:(collections.abc.Callable)),args:(list:(type:(pathlib.Path)" + b"type:(builtins.int))type:(origin:(type:(builtins.tuple))," + b"args:(type:(builtins.float)type:(builtins.str)))))" ) diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index 06747f9bba..821d5162c2 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -746,7 +746,7 @@ def Workflow(in_file: MyFormatX) -> MyFormatX: assert exc_info_matches(exc_info, "Cannot coerce") inner = workflow.add( # Generic task - OtherSpecificTask(in_file=entry.out.cast(MyOtherFormatX)) + OtherSpecificTask(in_file=workflow.cast(entry.out, MyOtherFormatX)) ) with pytest.raises(TypeError) as exc_info: @@ -756,7 +756,7 @@ def Workflow(in_file: MyFormatX) -> MyFormatX: assert exc_info_matches(exc_info, "Cannot coerce") exit = workflow.add( - SpecificTask(in_file=inner.out.cast(MyFormatX)), name="exit" + SpecificTask(in_file=workflow.cast(inner.out, MyFormatX)), name="exit" ) return exit.out From 812d7b45b14b9ec6e2bf1deef822439ccab98672 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 18:32:22 +1100 Subject: [PATCH 179/342] added nboutput clear to pre-commit --- .pre-commit-config.yaml | 47 +++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f36105398e..2ea004790e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,24 +1,29 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files -- repo: https://github.com/psf/black - rev: 24.4.2 - hooks: - - id: black -- repo: https://github.com/codespell-project/codespell - rev: v2.3.0 - hooks: - - id: codespell - additional_dependencies: - - tomli -- repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 - hooks: - - id: flake8 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: + - tomli + - repo: https://github.com/PyCQA/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + - repo: https://github.com/kynan/nbstripout + rev: 0.5.0 + hooks: + - id: nbstripout + files: \.(ipynb)$ From 5c9986cc91885391662846f535d7626e0c80aebb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 6 Feb 2025 07:33:18 +0000 Subject: [PATCH 180/342] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../source/howto/create-task-package.ipynb | 46 +- .../source/tutorial/1-getting-started.ipynb | 85 +- .../tutorial/2-advanced-execution.ipynb | 893 +++++++++--------- .../source/tutorial/3-troubleshooting.ipynb | 18 +- new-docs/source/tutorial/4-python.ipynb | 575 +++++------ new-docs/source/tutorial/5-shell.ipynb | 876 ++++++++--------- new-docs/source/tutorial/6-workflow.ipynb | 50 +- .../source/tutorial/7-canonical-form.ipynb | 492 +++++----- 8 files changed, 1360 insertions(+), 1675 deletions(-) diff --git a/new-docs/source/howto/create-task-package.ipynb b/new-docs/source/howto/create-task-package.ipynb index 1b491cbc37..6b454fbae2 100644 --- a/new-docs/source/howto/create-task-package.ipynb +++ b/new-docs/source/howto/create-task-package.ipynb @@ -1,25 +1,25 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create a task package\n", - "\n", - "Work in progress..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a task package\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index a7a1b1ca22..9bb05264f6 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -49,7 +49,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -108,7 +108,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -146,26 +146,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n" - ] - } - ], + "outputs": [], "source": [ "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", @@ -197,26 +180,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n" - ] - } - ], + "outputs": [], "source": [ "\n", "\n", @@ -263,26 +229,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n", - "/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif\n" - ] - } - ], + "outputs": [], "source": [ "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", @@ -324,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -341,17 +290,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SplitOutputs(out_file=[ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif')], return_code=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], stderr=['\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpz3lekmr7/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n'], stdout=['', '', '', '', '', '', '', '', '', ''])\n" - ] - } - ], + "outputs": [], "source": [ "from pydra.utils import default_run_cache_dir\n", "\n", diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 62d8fd1e49..3fe3dbd2c7 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -1,474 +1,423 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Advanced execution\n", - "\n", - "One of the key design features of Pydra is the separation between the parameterisation of\n", - "the task to be executed, and the parameresiation of where and how the task should be\n", - "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", - "some of the available options for executing a task.\n", - "\n", - "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import nest_asyncio\n", - "nest_asyncio.apply()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Submitter\n", - "\n", - "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import python\n", - "\n", - "@python.define\n", - "def TenToThePower(p: int) -> int:\n", - " return 10 ** p" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev144+g6a590e9d.d20250124\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result(output_dir=PosixPath('/Users/tclose/Library/Caches/pydra/0.24.dev36+g0deadc43/run-cache/TenToThePower_72982a38b5a17142cb2186803fe6b238'), outputs=TenToThePowerOutputs(out=1000), runtime=None, errored=False)\n" - ] - } - ], - "source": [ - "from pydra.engine.submitter import Submitter\n", - "\n", - "ten_to_the_power = TenToThePower(p=3)\n", - "\n", - "with Submitter() as submitter:\n", - " result = submitter(ten_to_the_power)\n", - "\n", - "print(result)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `Result` object contains\n", - "\n", - "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", - "* `runtime`: information about the peak memory and CPU usage\n", - "* `errored`: the error status of the task\n", - "* `task`: the task object that generated the results\n", - "* `output_dir`: the output directory the results are stored in" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Workers\n", - "\n", - "Pydra supports several workers with which to execute tasks\n", - "\n", - "- `debug` (default)\n", - "- `cf`\n", - "- `slurm`\n", - "- `sge`\n", - "- `psij`\n", - "- `dask` (experimental)\n", - "\n", - "By default, the *debug* worker is used, which runs tasks serially in a single process\n", - "without use of the `asyncio` module. This makes it easier to debug errors in workflows\n", - "and python tasks, however, when using in Pydra in production you will typically want to\n", - "parallelise the execution for efficiency.\n", - "\n", - "If running on a local workstation, then the `cf` (*ConcurrentFutures*) worker is a good\n", - "option because it is able to spread the tasks to be run over multiple processes and\n", - "maximise CPU usage.\n", - "\n", - "If you have access to a high-performance cluster (HPC) then\n", - "the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", - "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) and [PSI/J](https://exaworks.org/psij)\n", - "workers can be used to submit each workflow node as separate jobs to the HPC scheduler.\n", - "There is also an experimental [Dask](https://www.dask.org/) worker, which provides a\n", - "range of execution backends to choose from.\n", - "\n", - "To specify a worker, the abbreviation can be passed either as a string or using the\n", - "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", - "arguments to the execution call. For example, if we wanted to run five tasks using the\n", - "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", - "call." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10^5 = 100000\n" - ] - } - ], - "source": [ - "from pydra.design import python\n", - "\n", - "if __name__ == \"__main__\":\n", - "\n", - " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", - "\n", - " # Run the 5 tasks in parallel split across 3 processes\n", - " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", - "\n", - " p1, p2, p3, p4, p5 = outputs.out\n", - "\n", - " print(f\"10^5 = {p5}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Alternatively, the worker object can be initialised in the calling code and passed directly to the execution call" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "10^10 = 10000000000\n" - ] - } - ], - "source": [ - "from pydra.engine.workers import ConcurrentFuturesWorker\n", - "\n", - "ten_to_the_power = TenToThePower().split(p=[6, 7, 8, 9, 10])\n", - "\n", - "# Run the 5 tasks in parallel split across 3 processes\n", - "outputs = ten_to_the_power(worker=ConcurrentFuturesWorker(n_procs=3))\n", - "\n", - "p6, p7, p8, p9, p10 = outputs.out\n", - "\n", - "print(f\"10^10 = {p10}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Reusing previously generated results\n", - "\n", - "Pydra caches all task results in the runtime cache (see [File-system locations](./1-getting-started.html##File-system-locations))\n", - "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", - "go through some of the practicalities of this caching and hashing (see\n", - "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", - "to consider)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Result(output_dir=PosixPath('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/Split_23f11b08e0449f5c5ee0a2756aeee2ea'), outputs=SplitOutputs(out_file=[ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif'), ImageFormat('/private/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/cache/MrGrid_9ecfc891341bf304360011011e2541ab/out_file.mif')], return_code=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], stderr=['\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n', '\\x1b[?7l\\rmrgrid: [ 18%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 25%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 30%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 38%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 43%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 47%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 52%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [ 55%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\x1b[?7l\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"...\\x1b[0K\\x1b[?7h\\rmrgrid: [100%] reslicing \"/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpajvhv2av/nifti/o7hRfp9mNCCElZfobqz3xhq9.nii\"\\x1b[0K\\n'], stdout=['', '', '', '', '', '', '', '', '', '']), runtime=None, errored=False)\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "import tempfile\n", - "from fileformats.medimage import Nifti1\n", - "from pydra.engine.submitter import Submitter\n", - "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", - "\n", - "# Make directory filled with nifti files\n", - "test_dir = Path(tempfile.mkdtemp())\n", - "nifti_dir = test_dir / \"nifti\"\n", - "nifti_dir.mkdir()\n", - "for i in range(10):\n", - " Nifti1.sample(nifti_dir, seed=i)\n", - "\n", - "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", - "# by splitting the \"input\" input field over all files in the directory\n", - "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", - " in_file=nifti_dir.iterdir()\n", - ")\n", - "\n", - "# Run the task to resample all NIfTI files\n", - "outputs = mrgrid()\n", - "\n", - "# Create a new custom directory\n", - "cache_dir = test_dir / \"cache\"\n", - "cache_dir.mkdir()\n", - "\n", - "submitter = Submitter(cache_dir=cache_dir)\n", - "\n", - "# Run the task to resample all NIfTI files with different voxel sizes\n", - "with submitter:\n", - " result1 = submitter(mrgrid)\n", - "\n", - "print(result1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we attempt to run the same task with the same parameterisation the cache directory\n", - "will point to the same location and the results will be reused" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import copy\n", - "\n", - "VOX_SIZES = [\n", - " (0.5, 0.5, 0.5),\n", - " (0.25, 0.25, 0.25),\n", - " (0.1, 0.1, 0.1),\n", - " (0.35, 0.35, 0.35),\n", - " (0.1, 0.1, 0.1),\n", - " (0.5, 0.5, 0.5),\n", - " (0.25, 0.25, 0.25),\n", - " (0.2, 0.2, 0.2),\n", - " (0.35, 0.35, 0.35),\n", - " (0.1, 0.1, 0.1),\n", - " ]\n", - "\n", - "mrgrid_varying_vox = MrGrid(operation=\"regrid\").split(\n", - " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", - " voxel=VOX_SIZES,\n", - ")\n", - "\n", - "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", - "\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result1 = submitter(mrgrid_varying_vox)\n", - "\n", - "\n", - "mrgrid_varying_vox2 = MrGrid(operation=\"regrid\").split(\n", - " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", - " voxel=copy(VOX_SIZES),\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result2 = submitter(mrgrid_varying_vox2)\n", - "\n", - "# Check that the output directory is the same for both runs\n", - "assert result2.output_dir == result1.output_dir\n", - "\n", - "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", - "mrgrid_varying_vox2.voxel[2] = [0.25]\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result3 = submitter(mrgrid_varying_vox2)\n", - "\n", - "# The output directory will be different as the inputs are now different\n", - "assert result3.output_dir != result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that for file objects, the contents of the files are used to calculate the hash\n", - "not their paths. Therefore, when inputting large files there might be some additional\n", - "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", - "shouldn't need to be recalculated unless they are modified). However, this makes the\n", - "hashes invariant to file-system movement. For example, changing the name of one of the\n", - "files in the nifti directory won't invalidate the hash." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "ename": "AssertionError", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[8], line 34\u001b[0m\n\u001b[1;32m 31\u001b[0m result4 \u001b[38;5;241m=\u001b[39m submitter(mrgrid_varying_vox4)\n\u001b[1;32m 33\u001b[0m \u001b[38;5;66;03m# The cache directory for the new run is different \u001b[39;00m\n\u001b[0;32m---> 34\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m result4\u001b[38;5;241m.\u001b[39moutput_dir \u001b[38;5;241m!=\u001b[39m result1\u001b[38;5;241m.\u001b[39moutput_dir\n", - "\u001b[0;31mAssertionError\u001b[0m: " - ] - } - ], - "source": [ - "# Rename a NIfTI file within the test directory\n", - "first_file = next(nifti_dir.iterdir())\n", - "new_name = first_file.with_name(\"first.nii\")\n", - "first_file.rename(new_name)\n", - "\n", - "mrgrid_varying_vox3 = MrGrid(operation=\"regrid\").split(\n", - " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", - " voxel=VOX_SIZES,\n", - ")\n", - "\n", - "# Result from previous run is reused as the task and inputs are identical\n", - "with submitter:\n", - " result3 = submitter(mrgrid_varying_vox3)\n", - "\n", - "assert result3.output_dir == result1.output_dir\n", - "\n", - "# Replace the first NIfTI file with a new file\n", - "new_name.unlink()\n", - "Nifti1.sample(nifti_dir, seed=100)\n", - "\n", - "# Update the in_file input field to include the new file\n", - "mrgrid_varying_vox4 = MrGrid(operation=\"regrid\").split(\n", - " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", - " voxel=VOX_SIZES,\n", - ")\n", - "\n", - "# The results from the previous runs are ignored as the files have changed\n", - "with submitter:\n", - " result4 = submitter(mrgrid_varying_vox4)\n", - "\n", - "# The cache directory for the new run is different \n", - "assert result4.output_dir != result1.output_dir" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environments\n", - "\n", - "For shell tasks, it is possible to specify that the command runs within a specific\n", - "software environment, such as those provided by software containers (e.g. Docker or Apptainer).\n", - "This is down by providing the environment to the submitter/execution call," - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import tempfile\n", - "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", - "from pydra.engine.environments import Docker\n", - "\n", - "test_dir = tempfile.mkdtemp()\n", - "\n", - "nifti_file = Nifti1.sample(test_dir, seed=0)\n", - "\n", - "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", - "# by splitting the \"input\" input field over all files in the directory\n", - "mrgrid = MrGrid(in_file=nifti_file, operation=\"regrid\", voxel=(0.5,0.5,0.5))\n", - "\n", - "# Run the task to resample all NIfTI files\n", - "outputs = mrgrid(environment=Docker(image=\"mrtrix3/mrtrix3\", tag=\"latest\"))\n", - "\n", - "# Print the locations of the output files\n", - "print(\"\\n\".join(str(p) for p in outputs.out_file))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of course for this to work Docker needs to work and be configured for\n", - "[sudo-less execution](https://docs.docker.com/engine/install/linux-postinstall/).\n", - "See [Containers and Environments](../explanation/environments.rst) for more details on\n", - "how to utilise containers and add support for other software environments." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Provenance and auditing\n", - "\n", - "Work in progress..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Advanced execution\n", + "\n", + "One of the key design features of Pydra is the separation between the parameterisation of\n", + "the task to be executed, and the parameresiation of where and how the task should be\n", + "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", + "some of the available options for executing a task.\n", + "\n", + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submitter\n", + "\n", + "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "@python.define\n", + "def TenToThePower(p: int) -> int:\n", + " return 10 ** p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.submitter import Submitter\n", + "\n", + "ten_to_the_power = TenToThePower(p=3)\n", + "\n", + "with Submitter() as submitter:\n", + " result = submitter(ten_to_the_power)\n", + "\n", + "print(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `Result` object contains\n", + "\n", + "* `output`: the outputs of the task (if there is only one output it is called `out` by default)\n", + "* `runtime`: information about the peak memory and CPU usage\n", + "* `errored`: the error status of the task\n", + "* `task`: the task object that generated the results\n", + "* `output_dir`: the output directory the results are stored in" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workers\n", + "\n", + "Pydra supports several workers with which to execute tasks\n", + "\n", + "- `debug` (default)\n", + "- `cf`\n", + "- `slurm`\n", + "- `sge`\n", + "- `psij`\n", + "- `dask` (experimental)\n", + "\n", + "By default, the *debug* worker is used, which runs tasks serially in a single process\n", + "without use of the `asyncio` module. This makes it easier to debug errors in workflows\n", + "and python tasks, however, when using in Pydra in production you will typically want to\n", + "parallelise the execution for efficiency.\n", + "\n", + "If running on a local workstation, then the `cf` (*ConcurrentFutures*) worker is a good\n", + "option because it is able to spread the tasks to be run over multiple processes and\n", + "maximise CPU usage.\n", + "\n", + "If you have access to a high-performance cluster (HPC) then\n", + "the [SLURM](https://slurm.schedmd.com/documentation.html) and\n", + "[SGE](https://www.metagenomics.wiki/tools/hpc-sge) and [PSI/J](https://exaworks.org/psij)\n", + "workers can be used to submit each workflow node as separate jobs to the HPC scheduler.\n", + "There is also an experimental [Dask](https://www.dask.org/) worker, which provides a\n", + "range of execution backends to choose from.\n", + "\n", + "To specify a worker, the abbreviation can be passed either as a string or using the\n", + "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", + "arguments to the execution call. For example, if we wanted to run five tasks using the\n", + "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", + "call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "if __name__ == \"__main__\":\n", + "\n", + " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", + "\n", + " # Run the 5 tasks in parallel split across 3 processes\n", + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + "\n", + " p1, p2, p3, p4, p5 = outputs.out\n", + "\n", + " print(f\"10^5 = {p5}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, the worker object can be initialised in the calling code and passed directly to the execution call" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.workers import ConcurrentFuturesWorker\n", + "\n", + "ten_to_the_power = TenToThePower().split(p=[6, 7, 8, 9, 10])\n", + "\n", + "# Run the 5 tasks in parallel split across 3 processes\n", + "outputs = ten_to_the_power(worker=ConcurrentFuturesWorker(n_procs=3))\n", + "\n", + "p6, p7, p8, p9, p10 = outputs.out\n", + "\n", + "print(f\"10^10 = {p10}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reusing previously generated results\n", + "\n", + "Pydra caches all task results in the runtime cache (see [File-system locations](./1-getting-started.html##File-system-locations))\n", + "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", + "go through some of the practicalities of this caching and hashing (see\n", + "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", + "to consider)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import tempfile\n", + "from fileformats.medimage import Nifti1\n", + "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "\n", + "# Make directory filled with nifti files\n", + "test_dir = Path(tempfile.mkdtemp())\n", + "nifti_dir = test_dir / \"nifti\"\n", + "nifti_dir.mkdir()\n", + "for i in range(10):\n", + " Nifti1.sample(nifti_dir, seed=i)\n", + "\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", + " in_file=nifti_dir.iterdir()\n", + ")\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid()\n", + "\n", + "# Create a new custom directory\n", + "cache_dir = test_dir / \"cache\"\n", + "cache_dir.mkdir()\n", + "\n", + "submitter = Submitter(cache_dir=cache_dir)\n", + "\n", + "# Run the task to resample all NIfTI files with different voxel sizes\n", + "with submitter:\n", + " result1 = submitter(mrgrid)\n", + "\n", + "print(result1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we attempt to run the same task with the same parameterisation the cache directory\n", + "will point to the same location and the results will be reused" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from copy import copy\n", + "\n", + "VOX_SIZES = [\n", + " (0.5, 0.5, 0.5),\n", + " (0.25, 0.25, 0.25),\n", + " (0.1, 0.1, 0.1),\n", + " (0.35, 0.35, 0.35),\n", + " (0.1, 0.1, 0.1),\n", + " (0.5, 0.5, 0.5),\n", + " (0.25, 0.25, 0.25),\n", + " (0.2, 0.2, 0.2),\n", + " (0.35, 0.35, 0.35),\n", + " (0.1, 0.1, 0.1),\n", + " ]\n", + "\n", + "mrgrid_varying_vox = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", + "\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result1 = submitter(mrgrid_varying_vox)\n", + "\n", + "\n", + "mrgrid_varying_vox2 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=copy(VOX_SIZES),\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result2 = submitter(mrgrid_varying_vox2)\n", + "\n", + "# Check that the output directory is the same for both runs\n", + "assert result2.output_dir == result1.output_dir\n", + "\n", + "# Change the voxel sizes to resample the NIfTI files to for one of the files\n", + "mrgrid_varying_vox2.voxel[2] = [0.25]\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox2)\n", + "\n", + "# The output directory will be different as the inputs are now different\n", + "assert result3.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that for file objects, the contents of the files are used to calculate the hash\n", + "not their paths. Therefore, when inputting large files there might be some additional\n", + "overhead on the first run (the file hashes themselves are cached by path and mtime so\n", + "shouldn't need to be recalculated unless they are modified). However, this makes the\n", + "hashes invariant to file-system movement. For example, changing the name of one of the\n", + "files in the nifti directory won't invalidate the hash." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Rename a NIfTI file within the test directory\n", + "first_file = next(nifti_dir.iterdir())\n", + "new_name = first_file.with_name(\"first.nii\")\n", + "first_file.rename(new_name)\n", + "\n", + "mrgrid_varying_vox3 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "# Result from previous run is reused as the task and inputs are identical\n", + "with submitter:\n", + " result3 = submitter(mrgrid_varying_vox3)\n", + "\n", + "assert result3.output_dir == result1.output_dir\n", + "\n", + "# Replace the first NIfTI file with a new file\n", + "new_name.unlink()\n", + "Nifti1.sample(nifti_dir, seed=100)\n", + "\n", + "# Update the in_file input field to include the new file\n", + "mrgrid_varying_vox4 = MrGrid(operation=\"regrid\").split(\n", + " (\"in_file\", \"voxel\"),\n", + " in_file=nifti_dir.iterdir(),\n", + " voxel=VOX_SIZES,\n", + ")\n", + "\n", + "# The results from the previous runs are ignored as the files have changed\n", + "with submitter:\n", + " result4 = submitter(mrgrid_varying_vox4)\n", + "\n", + "# The cache directory for the new run is different \n", + "assert result4.output_dir != result1.output_dir" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Environments\n", + "\n", + "For shell tasks, it is possible to specify that the command runs within a specific\n", + "software environment, such as those provided by software containers (e.g. Docker or Apptainer).\n", + "This is down by providing the environment to the submitter/execution call," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tempfile\n", + "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", + "from pydra.engine.environments import Docker\n", + "\n", + "test_dir = tempfile.mkdtemp()\n", + "\n", + "nifti_file = Nifti1.sample(test_dir, seed=0)\n", + "\n", + "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", + "# by splitting the \"input\" input field over all files in the directory\n", + "mrgrid = MrGrid(in_file=nifti_file, operation=\"regrid\", voxel=(0.5,0.5,0.5))\n", + "\n", + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid(environment=Docker(image=\"mrtrix3/mrtrix3\", tag=\"latest\"))\n", + "\n", + "# Print the locations of the output files\n", + "print(\"\\n\".join(str(p) for p in outputs.out_file))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course for this to work Docker needs to work and be configured for\n", + "[sudo-less execution](https://docs.docker.com/engine/install/linux-postinstall/).\n", + "See [Containers and Environments](../explanation/environments.rst) for more details on\n", + "how to utilise containers and add support for other software environments." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provenance and auditing\n", + "\n", + "Work in progress..." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index aa0950ea3e..17b4307952 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -30,7 +30,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -187,21 +187,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'Submitter' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpydra\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtasks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtesting\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SafeDivisionWorkflow\n\u001b[1;32m 3\u001b[0m wf \u001b[38;5;241m=\u001b[39m SafeDivisionWorkflow(a\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m, b\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m)\u001b[38;5;241m.\u001b[39msplit(denominator\u001b[38;5;241m=\u001b[39m[\u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m2\u001b[39m ,\u001b[38;5;241m0\u001b[39m])\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[43mSubmitter\u001b[49m(worker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcf\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m sub:\n\u001b[1;32m 6\u001b[0m result \u001b[38;5;241m=\u001b[39m sub(wf)\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWorkflow completed successfully, results saved in: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresult\u001b[38;5;241m.\u001b[39moutput_dir\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'Submitter' is not defined" - ] - } - ], + "outputs": [], "source": [ "from pydra.tasks.testing import SafeDivisionWorkflow\n", "\n", diff --git a/new-docs/source/tutorial/4-python.ipynb b/new-docs/source/tutorial/4-python.ipynb index c8cf726689..a09a3d630c 100644 --- a/new-docs/source/tutorial/4-python.ipynb +++ b/new-docs/source/tutorial/4-python.ipynb @@ -1,319 +1,260 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Python-tasks\n", - "\n", - "Python task definitions are Python functions that are parameterised in a separate step before\n", - "they are executed or added to a workflow.\n", - "\n", - "## Define decorator\n", - "\n", - "The simplest way to define a Python task is to decorate a function with `pydra.design.python.define`" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import python\n", - "\n", - "# Note that we use CamelCase as the return of the is a class\n", - "@python.define\n", - "def MyFirstTaskDef(a, b):\n", - " \"\"\"Sample function for testing\"\"\"\n", - " return a + b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The resulting task-definition class can be then parameterized (instantiated), and\n", - "executed" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "A newer version (0.25) of nipype/pydra is available. You are using 0.25.dev103+g1a6b067c.d20241228\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3.0\n" - ] - } - ], - "source": [ - "# Instantiate the task, setting all parameters\n", - "my_first_task = MyFirstTaskDef(a=1, b=2.0)\n", - "\n", - "# Execute the task\n", - "outputs = my_first_task()\n", - "\n", - "print(outputs.out)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By default, the name of the output field for a function with only one output is `out`. To\n", - "name this something else, or in the case where there are multiple output fields, the `outputs`\n", - "argument can be provided to `python.define`\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NamedOutputTaskOutputs(c=3, d=1)\n" - ] - } - ], - "source": [ - "@python.define(outputs=[\"c\", \"d\"])\n", - "def NamedOutputTaskDef(a, b):\n", - " \"\"\"Sample function for testing\"\"\"\n", - " return a + b, a - b\n", - "\n", - "named_output_task = NamedOutputTaskDef(a=2, b=1)\n", - "\n", - "outputs = named_output_task()\n", - "\n", - "print(outputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The input and output field attributes automatically extracted from the function, explicit\n", - "attributes can be augmented" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(\n", - " inputs={\"a\": python.arg(allowed_values=[1, 2, 3]), \"b\": python.arg(default=10.0)},\n", - " outputs={\n", - " \"c\": python.out(type=float, help=\"the sum of the inputs\"),\n", - " \"d\": python.out(type=float, help=\"the difference of the inputs\"),\n", - " },\n", - ")\n", - "def AugmentedTaskDef(a, b):\n", - " \"\"\"Sample function for testing\"\"\"\n", - " return a + b, a - b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Type annotations\n", - "\n", - "If provided, type annotations are included in the task definition, and are checked at\n", - "the time of parameterisation." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Type error caught: Incorrect type for field in 'a' field of MyTypedTask interface : 1.5 is not of type (and cannot be coerced to it)\n" - ] - } - ], - "source": [ - "from pydra.design import python\n", - "\n", - "# Note that we use CamelCase as the function is translated to a class\n", - "\n", - "@python.define\n", - "def MyTypedTask(a: int, b: float) -> float:\n", - " \"\"\"Sample function for testing\"\"\"\n", - " return a + b\n", - "\n", - "try:\n", - " # 1.5 is not an integer so this should raise a TypeError\n", - " my_typed_task = MyTypedTask(a=1.5, b=2.0)\n", - "except TypeError as e:\n", - " print(f\"Type error caught: {e}\")\n", - "else:\n", - " assert False, \"Expected a TypeError\"\n", - "\n", - "# While 2 is an integer, it can be implicitly coerced to a float\n", - "my_typed_task = MyTypedTask(a=1, b=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "## Docstring parsing\n", - "\n", - "Instead of explicitly providing help strings and output names in `inputs` and `outputs`\n", - "arguments, if the function describes the its inputs and/or outputs in the doc string, \n", - "in either reST, Google or NumpyDoc style, then they will be extracted and included in the\n", - "input or output fields\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'a': arg(name='a', type=, default=EMPTY, help='First input to be inputted', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'b': arg(name='b', type=, default=EMPTY, help='Second input', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False),\n", - " 'function': arg(name='function', type=typing.Callable, default=, help='', requires=[], converter=None, validator=None, allowed_values=(), xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False)}\n", - "{'c': out(name='c', type=, default=EMPTY, help='Sum of a and b', requires=[], converter=None, validator=None),\n", - " 'd': out(name='d', type=, default=EMPTY, help='Product of a and b', requires=[], converter=None, validator=None)}\n" - ] - } - ], - "source": [ - "from pprint import pprint\n", - "from pydra.engine.helpers import fields_dict\n", - "\n", - "@python.define\n", - "def DocStrDef(a: int, b: float) -> tuple[float, float]:\n", - " \"\"\"Sample function for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - "\n", - " Returns:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - " return a + b, a * b\n", - "\n", - "pprint(fields_dict(DocStrDef))\n", - "pprint(fields_dict(DocStrDef.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Wrapping external functions\n", - "\n", - "Like all decorators, `python.define` is just a function, so can also be used to convert\n", - "a function that is defined separately into a Python task definition." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[3.5]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "NumpyCorrelate = python.define(np.correlate)\n", - "\n", - "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", - "\n", - "outputs = numpy_correlate()\n", - "\n", - "print(outputs.out)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like with decorated functions, input and output fields can be explicitly augmented via\n", - "the `inputs` and `outputs` arguments" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[3.5]\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "NumpyCorrelate = python.define(np.correlate, outputs=[\"correlation\"])\n", - "\n", - "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", - "\n", - "outputs = numpy_correlate()\n", - "\n", - "print(outputs.correlation)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python-tasks\n", + "\n", + "Python task definitions are Python functions that are parameterised in a separate step before\n", + "they are executed or added to a workflow.\n", + "\n", + "## Define decorator\n", + "\n", + "The simplest way to define a Python task is to decorate a function with `pydra.design.python.define`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "# Note that we use CamelCase as the return of the is a class\n", + "@python.define\n", + "def MyFirstTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The resulting task-definition class can be then parameterized (instantiated), and\n", + "executed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the task, setting all parameters\n", + "my_first_task = MyFirstTaskDef(a=1, b=2.0)\n", + "\n", + "# Execute the task\n", + "outputs = my_first_task()\n", + "\n", + "print(outputs.out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By default, the name of the output field for a function with only one output is `out`. To\n", + "name this something else, or in the case where there are multiple output fields, the `outputs`\n", + "argument can be provided to `python.define`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(outputs=[\"c\", \"d\"])\n", + "def NamedOutputTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a - b\n", + "\n", + "named_output_task = NamedOutputTaskDef(a=2, b=1)\n", + "\n", + "outputs = named_output_task()\n", + "\n", + "print(outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The input and output field attributes automatically extracted from the function, explicit\n", + "attributes can be augmented" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "@python.define(\n", + " inputs={\"a\": python.arg(allowed_values=[1, 2, 3]), \"b\": python.arg(default=10.0)},\n", + " outputs={\n", + " \"c\": python.out(type=float, help=\"the sum of the inputs\"),\n", + " \"d\": python.out(type=float, help=\"the difference of the inputs\"),\n", + " },\n", + ")\n", + "def AugmentedTaskDef(a, b):\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b, a - b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Type annotations\n", + "\n", + "If provided, type annotations are included in the task definition, and are checked at\n", + "the time of parameterisation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python\n", + "\n", + "# Note that we use CamelCase as the function is translated to a class\n", + "\n", + "@python.define\n", + "def MyTypedTask(a: int, b: float) -> float:\n", + " \"\"\"Sample function for testing\"\"\"\n", + " return a + b\n", + "\n", + "try:\n", + " # 1.5 is not an integer so this should raise a TypeError\n", + " my_typed_task = MyTypedTask(a=1.5, b=2.0)\n", + "except TypeError as e:\n", + " print(f\"Type error caught: {e}\")\n", + "else:\n", + " assert False, \"Expected a TypeError\"\n", + "\n", + "# While 2 is an integer, it can be implicitly coerced to a float\n", + "my_typed_task = MyTypedTask(a=1, b=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Docstring parsing\n", + "\n", + "Instead of explicitly providing help strings and output names in `inputs` and `outputs`\n", + "arguments, if the function describes the its inputs and/or outputs in the doc string, \n", + "in either reST, Google or NumpyDoc style, then they will be extracted and included in the\n", + "input or output fields\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", + "@python.define\n", + "def DocStrDef(a: int, b: float) -> tuple[float, float]:\n", + " \"\"\"Sample function for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + "\n", + " Returns:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + " return a + b, a * b\n", + "\n", + "pprint(fields_dict(DocStrDef))\n", + "pprint(fields_dict(DocStrDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wrapping external functions\n", + "\n", + "Like all decorators, `python.define` is just a function, so can also be used to convert\n", + "a function that is defined separately into a Python task definition." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "NumpyCorrelate = python.define(np.correlate)\n", + "\n", + "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", + "\n", + "outputs = numpy_correlate()\n", + "\n", + "print(outputs.out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like with decorated functions, input and output fields can be explicitly augmented via\n", + "the `inputs` and `outputs` arguments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "NumpyCorrelate = python.define(np.correlate, outputs=[\"correlation\"])\n", + "\n", + "numpy_correlate = NumpyCorrelate(a=[1, 2, 3], v=[0, 1, 0.5])\n", + "\n", + "outputs = numpy_correlate()\n", + "\n", + "print(outputs.correlation)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/5-shell.ipynb b/new-docs/source/tutorial/5-shell.ipynb index 0cb40bc080..3d57db0696 100644 --- a/new-docs/source/tutorial/5-shell.ipynb +++ b/new-docs/source/tutorial/5-shell.ipynb @@ -1,493 +1,387 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Shell-tasks" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Command-line templates\n", - "\n", - "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import shell\n", - "\n", - "Cp = shell.define(\"cp \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", - "\n", - "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Command-line to be run: cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt\n", - "Contents of copied file ('/var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/out.txt'): 'Contents to be copied'\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "from tempfile import mkdtemp\n", - "\n", - "# Make a test file to copy\n", - "test_dir = Path(mkdtemp())\n", - "test_file = test_dir / \"in.txt\"\n", - "with open(test_file, \"w\") as f:\n", - " f.write(\"Contents to be copied\")\n", - "\n", - "# Parameterise the task definition\n", - "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", - "\n", - "# Print the cmdline to be run to double check\n", - "print(f\"Command-line to be run: {cp.cmdline}\")\n", - "\n", - "# Run the shell-comand task\n", - "outputs = cp()\n", - "\n", - "print(\n", - " f\"Contents of copied file ('{outputs.destination}'): \"\n", - " f\"'{Path(outputs.destination).read_text()}'\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If paths to output files are not provided in the parameterisation, it will default to the name of the field" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "cp /var/folders/mz/yn83q2fd3s758w1j75d2nnw80000gn/T/tmpoyx19gql/in.txt /Users/tclose/git/workflows/pydra/docs/source/tutorial/destination\n" - ] - } - ], - "source": [ - "cp = Cp(in_file=test_file)\n", - "print(cp.cmdline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Defifying types\n", - "\n", - "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", - "\n", - "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "trim-png /mock/png.png /path/to/output.png\n" - ] - } - ], - "source": [ - "from fileformats.image import Png\n", - "\n", - "TrimPng = shell.define(\"trim-png \")\n", - "\n", - "trim_png = TrimPng(in_image=Png.mock(), out_image=\"/path/to/output.png\")\n", - "\n", - "print(trim_png.cmdline)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Flags and options\n", - "\n", - "Command line flags can also be added to the shell template, either the single or double hyphen form. The field template name immediately following the flag will be associate with that flag.\n", - "\n", - "If there is no space between the flag and the field template, then the field is assumed to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", - "\n", - "If a field is optional, the field template should end with a `?`. Tuple fields are specified by comma separated types.\n", - "\n", - "Varargs are specified by the type followed by an ellipsis, e.g. ``" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'recursive': arg(name='recursive', type=, default=False, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=str | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=6, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" - ] - } - ], - "source": [ - "from pprint import pprint\n", - "from pydra.engine.helpers import fields_dict\n", - "\n", - "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " )\n", - "\n", - "pprint(fields_dict(Cp))\n", - "pprint(fields_dict(Cp.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Defaults\n", - "\n", - "Defaults can be specified by appending them to the field template after `=`" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'--int-arg' default: 99\n" - ] - } - ], - "source": [ - "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " )\n", - "\n", - "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Additional field attributes\n", - "\n", - "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=1, sep=' ', allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", - " 'recursive': arg(name='recursive', type=, default=False, help='If source_file designates a directory, cp copies the directory and the entire subtree connected at that point.', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=5, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_dir': outarg(name='out_dir', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_dir', keep_extension=False),\n", - " 'out_file': outarg(name='out_file', type=fileformats.generic.file.File | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template='out_file', keep_extension=False),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" - ] - } - ], - "source": [ - "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " inputs={\"recursive\": shell.arg(\n", - " help=(\n", - " \"If source_file designates a directory, cp copies the directory and \"\n", - " \"the entire subtree connected at that point.\"\n", - " )\n", - " )},\n", - " outputs={\n", - " \"out_dir\": shell.outarg(position=-2),\n", - " \"out_file\": shell.outarg(position=-1),\n", - " },\n", - " )\n", - "\n", - "\n", - "pprint(fields_dict(Cp))\n", - "pprint(fields_dict(Cp.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Callable outptus\n", - "\n", - "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value. Callables can be either specified in the `callable` attribute of the `shell.out` field, or in a dictionary mapping the output name to the callable" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Size of the output file is: 256\n" - ] - } - ], - "source": [ - "import os\n", - "from pydra.design import shell\n", - "from pathlib import Path\n", - "from fileformats.generic import File\n", - "\n", - "# Arguments to the callable function can be one of \n", - "def get_file_size(out_file: Path) -> int:\n", - " \"\"\"Calculate the file size\"\"\"\n", - " result = os.stat(out_file)\n", - " return result.st_size\n", - "\n", - "\n", - "CpWithSize = shell.define(\n", - " \"cp \",\n", - " outputs={\"out_file_size\": get_file_size},\n", - ")\n", - "\n", - "# Parameterise the task definition\n", - "cp_with_size = CpWithSize(in_file=File.sample())\n", - "\n", - "# Run the command\n", - "outputs = cp_with_size()\n", - "\n", - "\n", - "print(f\"Size of the output file is: {outputs.out_file_size}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The callable can take any combination of the following arguments, which will be passed\n", - "to it when it is called\n", - "\n", - "* field: the `Field` object to be provided a value, useful when writing generic callables\n", - "* output_dir: a `Path` object referencing the working directory the command was run within\n", - "* inputs: a dictionary containing all the resolved inputs to the task\n", - "* stdout: the standard output stream produced by the command\n", - "* stderr: the standard error stream produced by the command\n", - "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'executable': arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='cp', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'in_fs_objects': arg(name='in_fs_objects', type=pydra.utils.typing.MultiInputObj[fileformats.generic.fsobject.FsObject], default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=5, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'int_arg': arg(name='int_arg', type=int | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--int-arg', position=1, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'recursive': arg(name='recursive', type=, default=False, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='-R', position=2, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'text_arg': arg(name='text_arg', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--text-arg', position=3, sep=None, allowed_values=None, container_path=False, formatter=None),\n", - " 'tuple_arg': arg(name='tuple_arg', type=tuple[int, str] | None, default=None, help='', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='--tuple-arg', position=4, sep=None, allowed_values=None, container_path=False, formatter=None)}\n", - "{'out_file': out(name='out_file', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, callable=None),\n", - " 'out_file_size': out(name='out_file_size', type=, default=EMPTY, help='', requires=[], converter=None, validator=None, callable=),\n", - " 'return_code': out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None),\n", - " 'stderr': out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None),\n", - " 'stdout': out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None)}\n" - ] - } - ], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To make workflows that use the interface type-checkable, the canonical form of a shell\n", - "task dataclass should inherit from `shell.Def` parameterized by its nested Outputs class,\n", - "and the `Outputs` nested class should inherit from `shell.Outputs`." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.engine.specs import ShellDef, ShellOutputs\n", - "\n", - "@shell.define\n", - "class Cp(ShellDef[\"Cp.Outputs\"]):\n", - "\n", - " executable = \"cp\"\n", - "\n", - " in_fs_objects: MultiInputObj[FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", - " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", - "\n", - " @shell.outputs\n", - " class Outputs(ShellOutputs):\n", - " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dynamic definitions\n", - "\n", - "In some cases, it is required to generate the definition for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ACommand input fields: [arg(name='in_file', type=, default=EMPTY, help='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-2, sep=None, allowed_values=None, container_path=False, formatter=None), outarg(name='out_file', type=, default=EMPTY, help='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), arg(name='executable', type=typing.Union[str, typing.Sequence[str]], default='a-command', help=\"the first part of the command, can be a string, e.g. 'ls', or a list, e.g. ['ls', '-l', 'dirname']\", requires=[], converter=None, validator=, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=0, sep=None, allowed_values=None, container_path=False, formatter=None)]\n", - "ACommand input fields: [outarg(name='out_file', type=, default=EMPTY, help='output file', requires=[], converter=None, validator=None, xor=(), copy_mode=, copy_collation=, copy_ext_decomp=, readonly=False, argstr='', position=-1, sep=None, allowed_values=None, container_path=False, formatter=None, path_template=None, keep_extension=False), out(name='out_file_size', type=, default=EMPTY, help='size of the output directory', requires=[], converter=None, validator=None, callable=), out(name='return_code', type=, default=EMPTY, help=\"The process' exit code.\", requires=[], converter=None, validator=None, callable=None), out(name='stdout', type=, default=EMPTY, help='The standard output stream produced by the command.', requires=[], converter=None, validator=None, callable=None), out(name='stderr', type=, default=EMPTY, help='The standard error stream produced by the command.', requires=[], converter=None, validator=None, callable=None)]\n" - ] - } - ], - "source": [ - "from fileformats.generic import File\n", - "from pydra.engine.helpers import list_fields\n", - "\n", - "ACommand = shell.define(\n", - " \"a-command\",\n", - " inputs={\n", - " \"in_file\": shell.arg(type=File, help=\"output file\", argstr=\"\", position=-2)\n", - " },\n", - " outputs={\n", - " \"out_file\": shell.outarg(\n", - " type=File, help=\"output file\", argstr=\"\", position=-1\n", - " ),\n", - " \"out_file_size\": {\n", - " \"type\": int,\n", - " \"help\": \"size of the output directory\",\n", - " \"callable\": get_file_size,\n", - " }\n", - " },\n", - ")\n", - "\n", - "\n", - "print(f\"ACommand input fields: {list_fields(ACommand)}\")\n", - "print(f\"ACommand input fields: {list_fields(ACommand.Outputs)}\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "wf12", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Shell-tasks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Command-line templates\n", + "\n", + "Shell task specs can be defined using from string templates that resemble the command-line usage examples typically used in in-line help. Therefore, they can be quick and intuitive way to specify a shell task. For example, a simple spec for the copy command `cp` that omits optional flags," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "\n", + "Cp = shell.define(\"cp \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Input and output fields are both specified by placing the name of the field within enclosing `<` and `>`. Outputs are differentiated by the `out|` prefix.\n", + "\n", + "This shell task can then be run just as a Python task would be run, first parameterising it, then executing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "\n", + "# Make a test file to copy\n", + "test_dir = Path(mkdtemp())\n", + "test_file = test_dir / \"in.txt\"\n", + "with open(test_file, \"w\") as f:\n", + " f.write(\"Contents to be copied\")\n", + "\n", + "# Parameterise the task definition\n", + "cp = Cp(in_file=test_file, destination=test_dir / \"out.txt\")\n", + "\n", + "# Print the cmdline to be run to double check\n", + "print(f\"Command-line to be run: {cp.cmdline}\")\n", + "\n", + "# Run the shell-comand task\n", + "outputs = cp()\n", + "\n", + "print(\n", + " f\"Contents of copied file ('{outputs.destination}'): \"\n", + " f\"'{Path(outputs.destination).read_text()}'\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If paths to output files are not provided in the parameterisation, it will default to the name of the field" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cp = Cp(in_file=test_file)\n", + "print(cp.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defifying types\n", + "\n", + "By default, shell-command fields are considered to be of `fileformats.generic.FsObject` type. However, more specific file formats or built-in Python types can be specified by appending the type to the field name after a `:`.\n", + "\n", + "File formats are specified by their MIME type or \"MIME-like\" strings (see the [FileFormats docs](https://arcanaframework.github.io/fileformats/mime.html) for details)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.image import Png\n", + "\n", + "TrimPng = shell.define(\"trim-png \")\n", + "\n", + "trim_png = TrimPng(in_image=Png.mock(), out_image=\"/path/to/output.png\")\n", + "\n", + "print(trim_png.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Flags and options\n", + "\n", + "Command line flags can also be added to the shell template, either the single or double hyphen form. The field template name immediately following the flag will be associate with that flag.\n", + "\n", + "If there is no space between the flag and the field template, then the field is assumed to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", + "\n", + "If a field is optional, the field template should end with a `?`. Tuple fields are specified by comma separated types.\n", + "\n", + "Varargs are specified by the type followed by an ellipsis, e.g. ``" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "\n", + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " )\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defaults\n", + "\n", + "Defaults can be specified by appending them to the field template after `=`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " )\n", + "\n", + "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Additional field attributes\n", + "\n", + "Additional attributes of the fields in the template can be specified by providing `shell.arg` or `shell.outarg` fields to the `inputs` and `outputs` keyword arguments to the define" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Cp = shell.define(\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " inputs={\"recursive\": shell.arg(\n", + " help=(\n", + " \"If source_file designates a directory, cp copies the directory and \"\n", + " \"the entire subtree connected at that point.\"\n", + " )\n", + " )},\n", + " outputs={\n", + " \"out_dir\": shell.outarg(position=-2),\n", + " \"out_file\": shell.outarg(position=-1),\n", + " },\n", + " )\n", + "\n", + "\n", + "pprint(fields_dict(Cp))\n", + "pprint(fields_dict(Cp.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Callable outptus\n", + "\n", + "In addition to outputs that are specified to the tool on the command line, outputs can be derived from the outputs of the tool by providing a Python function that can take the output directory and inputs as arguments and return the output value. Callables can be either specified in the `callable` attribute of the `shell.out` field, or in a dictionary mapping the output name to the callable" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pydra.design import shell\n", + "from pathlib import Path\n", + "from fileformats.generic import File\n", + "\n", + "# Arguments to the callable function can be one of \n", + "def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + "\n", + "CpWithSize = shell.define(\n", + " \"cp \",\n", + " outputs={\"out_file_size\": get_file_size},\n", + ")\n", + "\n", + "# Parameterise the task definition\n", + "cp_with_size = CpWithSize(in_file=File.sample())\n", + "\n", + "# Run the command\n", + "outputs = cp_with_size()\n", + "\n", + "\n", + "print(f\"Size of the output file is: {outputs.out_file_size}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The callable can take any combination of the following arguments, which will be passed\n", + "to it when it is called\n", + "\n", + "* field: the `Field` object to be provided a value, useful when writing generic callables\n", + "* output_dir: a `Path` object referencing the working directory the command was run within\n", + "* inputs: a dictionary containing all the resolved inputs to the task\n", + "* stdout: the standard output stream produced by the command\n", + "* stderr: the standard error stream produced by the command\n", + "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To make workflows that use the interface type-checkable, the canonical form of a shell\n", + "task dataclass should inherit from `shell.Def` parameterized by its nested Outputs class,\n", + "and the `Outputs` nested class should inherit from `shell.Outputs`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "\n", + "@shell.define\n", + "class Cp(ShellDef[\"Cp.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " @shell.outputs\n", + " class Outputs(ShellOutputs):\n", + " out_dir: Directory = shell.outarg(path_template=\"{out_dir}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dynamic definitions\n", + "\n", + "In some cases, it is required to generate the definition for a task dynamically, which can be done by just providing the executable to `shell.define` and specifying all inputs and outputs explicitly" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from fileformats.generic import File\n", + "from pydra.engine.helpers import list_fields\n", + "\n", + "ACommand = shell.define(\n", + " \"a-command\",\n", + " inputs={\n", + " \"in_file\": shell.arg(type=File, help=\"output file\", argstr=\"\", position=-2)\n", + " },\n", + " outputs={\n", + " \"out_file\": shell.outarg(\n", + " type=File, help=\"output file\", argstr=\"\", position=-1\n", + " ),\n", + " \"out_file_size\": {\n", + " \"type\": int,\n", + " \"help\": \"size of the output directory\",\n", + " \"callable\": get_file_size,\n", + " }\n", + " },\n", + ")\n", + "\n", + "\n", + "print(f\"ACommand input fields: {list_fields(ACommand)}\")\n", + "print(f\"ACommand input fields: {list_fields(ACommand.Outputs)}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index 4fa0c00d87..7c2dd20637 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -76,7 +76,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -121,7 +121,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -224,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -275,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -341,7 +341,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -384,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -430,37 +430,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "The canonical form of specs task definitions, , must inherit from ", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 56\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output_conversion\u001b[38;5;241m.\u001b[39mout_file\n\u001b[1;32m 54\u001b[0m test_dir \u001b[38;5;241m=\u001b[39m tempfile\u001b[38;5;241m.\u001b[39mmkdtemp()\n\u001b[0;32m---> 56\u001b[0m nifti_file \u001b[38;5;241m=\u001b[39m \u001b[43mNifti1\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 58\u001b[0m wf \u001b[38;5;241m=\u001b[39m ToyMedianThreshold(in_image\u001b[38;5;241m=\u001b[39mnifti_file)\n\u001b[1;32m 60\u001b[0m outputs \u001b[38;5;241m=\u001b[39m wf()\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1049\u001b[0m, in \u001b[0;36mFileSet.sample\u001b[0;34m(cls, dest_dir, seed, stem)\u001b[0m\n\u001b[1;32m 1046\u001b[0m stem \u001b[38;5;241m=\u001b[39m dest_dir\u001b[38;5;241m.\u001b[39mname\n\u001b[1;32m 1047\u001b[0m \u001b[38;5;66;03m# Need to use mock to get an instance in order to use the singledispatch-based\u001b[39;00m\n\u001b[1;32m 1048\u001b[0m \u001b[38;5;66;03m# extra decorator\u001b[39;00m\n\u001b[0;32m-> 1049\u001b[0m fspaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43mSampleFileGenerator\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdest_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdest_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfname_stem\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstem\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1051\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1053\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m(fspaths)\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/fileset.py:1082\u001b[0m, in \u001b[0;36mFileSet.sample_data\u001b[0;34m(cls, generator)\u001b[0m\n\u001b[1;32m 1068\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Converts the `generate_sample_data` method into a class method by mocking up\u001b[39;00m\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;124;03ma class instance and calling the method on it\u001b[39;00m\n\u001b[1;32m 1070\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;124;03m the generated file-system paths\u001b[39;00m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1081\u001b[0m mock: FileSet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mmock()\n\u001b[0;32m-> 1082\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_sample_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgenerator\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/extras.py:38\u001b[0m, in \u001b[0;36mextra..decorated\u001b[0;34m(obj, *args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m extras \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tp \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mreferenced_types(): \u001b[38;5;66;03m# type: ignore[attr-defined]\u001b[39;00m\n\u001b[0;32m---> 38\u001b[0m extras\u001b[38;5;241m.\u001b[39mappend(\u001b[43mimport_extras_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtp\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 39\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m dispatch_method(obj, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", - "File \u001b[0;32m~/git/workflows/fileformats/fileformats/core/utils.py:228\u001b[0m, in \u001b[0;36mimport_extras_module\u001b[0;34m(klass)\u001b[0m\n\u001b[1;32m 226\u001b[0m extras_pypi \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfileformats-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msub_pkg\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;250m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m-\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-extras\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 227\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 228\u001b[0m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mextras_pkg\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mModuleNotFoundError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo module named \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mextras_pkg\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", - "File \u001b[0;32m~/.pyenv/versions/3.12.5/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 89\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m:1387\u001b[0m, in \u001b[0;36m_gcd_import\u001b[0;34m(name, package, level)\u001b[0m\n", - "File \u001b[0;32m:1360\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:1331\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n", - "File \u001b[0;32m:935\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n", - "File \u001b[0;32m:995\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n", - "File \u001b[0;32m:488\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n", - "File \u001b[0;32m~/git/workflows/fileformats-medimage/extras/fileformats/extras/medimage/__init__.py:3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# flake8: noqa: F401\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_version\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m __version__\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m converters\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m dicom\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m diffusion\n", - "File \u001b[0;32m~/git/workflows/fileformats-medimage/extras/fileformats/extras/medimage/converters.py:188\u001b[0m\n\u001b[1;32m 183\u001b[0m lst\u001b[38;5;241m.\u001b[39mappend(file)\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m lst\n\u001b[1;32m 187\u001b[0m \u001b[38;5;129;43m@converter\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msource_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mMedicalImage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_format\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mAnalyze\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout_ext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mAnalyze\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mext\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m--> 188\u001b[0m \u001b[38;5;129;43m@shell\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefine\u001b[49m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;43;01mclass\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;21;43;01mMrConvert\u001b[39;49;00m\u001b[43m:\u001b[49m\n\u001b[1;32m 190\u001b[0m \u001b[38;5;250;43m \u001b[39;49m\u001b[38;5;124;43;03m\"\"\"If used correctly, this program can be a very useful workhorse. In addition to converting images between different formats, it can be used to extract specific studies from a data set, extract a specific region of interest, or flip the images. Some of the possible operations are described in more detail below.\u001b[39;49;00m\n\u001b[1;32m 191\u001b[0m \n\u001b[1;32m 192\u001b[0m \u001b[38;5;124;43;03m Note that for both the -coord and -axes options, indexing starts from 0 rather than 1. E.g. -coord 3 <#> selects volumes (the fourth dimension) from the series; -axes 0,1,2 includes only the three spatial axes in the output image.\u001b[39;49;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 291\u001b[0m \u001b[38;5;124;43;03m For more details, see http://www.mrtrix.org/.\u001b[39;49;00m\n\u001b[1;32m 292\u001b[0m \u001b[38;5;124;43;03m \"\"\"\u001b[39;49;00m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecutable\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmrconvert\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/design/shell.py:420\u001b[0m, in \u001b[0;36mdefine\u001b[0;34m(wrapped, inputs, outputs, bases, outputs_bases, auto_attribs, name)\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(wrapped, (\u001b[38;5;28mtype\u001b[39m, \u001b[38;5;28mstr\u001b[39m)):\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwrapped must be a class or a string, not \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mwrapped\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mwrapped\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m make\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/design/shell.py:320\u001b[0m, in \u001b[0;36mdefine..make\u001b[0;34m(wrapped)\u001b[0m\n\u001b[1;32m 318\u001b[0m class_name \u001b[38;5;241m=\u001b[39m klass\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m 319\u001b[0m check_explicit_fields_are_none(klass, inputs, outputs)\n\u001b[0;32m--> 320\u001b[0m parsed_inputs, parsed_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mextract_fields_from_class\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 321\u001b[0m \u001b[43m \u001b[49m\u001b[43mShellDef\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mShellOutputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mklass\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_attribs\u001b[49m\n\u001b[1;32m 322\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 324\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(wrapped, \u001b[38;5;28mstr\u001b[39m):\n", - "File \u001b[0;32m~/git/workflows/pydra/pydra/design/base.py:347\u001b[0m, in \u001b[0;36mextract_fields_from_class\u001b[0;34m(spec_type, outputs_type, klass, arg_type, out_type, auto_attribs)\u001b[0m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fields_dict\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(klass, spec_type):\n\u001b[0;32m--> 347\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 348\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe canonical form of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mspec_type\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__module__\u001b[39m\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m task definitions, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mklass\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, must inherit from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mspec_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 350\u001b[0m )\n\u001b[1;32m 352\u001b[0m inputs \u001b[38;5;241m=\u001b[39m get_fields(klass, arg_type, auto_attribs, input_helps)\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", - "\u001b[0;31mValueError\u001b[0m: The canonical form of specs task definitions, , must inherit from " - ] - } - ], + "outputs": [], "source": [ "import tempfile\n", "import numpy as np\n", diff --git a/new-docs/source/tutorial/7-canonical-form.ipynb b/new-docs/source/tutorial/7-canonical-form.ipynb index 0c8b3db30a..7e3ae1229d 100644 --- a/new-docs/source/tutorial/7-canonical-form.ipynb +++ b/new-docs/source/tutorial/7-canonical-form.ipynb @@ -1,248 +1,248 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Canonical task form\n", - "\n", - "Under the hood, all Python, shell and workflow task definitions generated by the\n", - "`pydra.design.*.define` decorators/functions are translated to\n", - "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", - "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", - "in the [Python-tasks](./4-python.html), [Shell-tasks](./5-shell.html) and [Workflow](./6-workflow.html)\n", - "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", - "for linters follow. Therefore, when designing task definitions to be used by third\n", - "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", - "explicit, \"canonical\" dataclass form.\n", - "\n", - "The syntax of the canonical form is close to that used by the\n", - "[Attrs](https://www.attrs.org/en/stable/) package itself, with class type annotations\n", - "used to define the fields of the inputs and outputs of the task. Tasks defined in canonical\n", - "form will be able to be statically type-checked by [MyPy](https://mypy-lang.org/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Python-task definitions\n", - "\n", - "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", - "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", - "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", - "Default values can also be set directly, as with Attrs classes.\n", - "\n", - "In order to allow static type-checkers to check the type of outputs of tasks added\n", - "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", - "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", - "`define` method implicitly). Thus the \"canonical form\" of Python task definition is as\n", - "follows" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pprint import pprint\n", - "from pydra.engine.helpers import fields_dict\n", - "from pydra.engine.specs import PythonDef, PythonOutputs\n", - "from pydra.design import python\n", - "\n", - "\n", - "@python.define\n", - "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", - " \"\"\"Canonical Python task definition class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int\n", - " b: float = 2.0 # set default value\n", - "\n", - " class Outputs(PythonOutputs):\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a / b\n", - "\n", - "pprint(fields_dict(CanonicalPythonDef))\n", - "pprint(fields_dict(CanonicalPythonDef.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To set additional attributes other than the type and default, such as `allowed_values`\n", - "and `validators`, `python.arg` and `python.out` can be used instead." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import attrs.validators\n", - "\n", - "\n", - "@python.define\n", - "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", - " \"\"\"Canonical Python task definition class for testing\n", - "\n", - " Args:\n", - " a: First input\n", - " to be inputted\n", - " b: Second input\n", - " \"\"\"\n", - "\n", - " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", - " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", - "\n", - " class Outputs(PythonOutputs):\n", - " \"\"\"\n", - " Args:\n", - " c: Sum of a and b\n", - " d: Product of a and b\n", - " \"\"\"\n", - "\n", - " c: float\n", - " d: float\n", - "\n", - " @staticmethod\n", - " def function(a, b):\n", - " return a + b, a / b\n", - "\n", - "pprint(fields_dict(CanonicalPythonDef))\n", - "pprint(fields_dict(CanonicalPythonDef.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Shell-task definitions\n", - "\n", - "The canonical form of shell tasks is the same as for Python tasks, except a string `executable`\n", - "attribute replaces the `function` staticmethod." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from fileformats import generic\n", - "from pydra.design import shell\n", - "from pydra.engine.specs import ShellDef, ShellOutputs\n", - "from pydra.utils.typing import MultiInputObj\n", - "\n", - "\n", - "@shell.define\n", - "class CpWithSize(ShellDef[\"CpWithSize.Outputs\"]):\n", - "\n", - " executable = \"cp\"\n", - "\n", - " in_fs_objects: MultiInputObj[generic.FsObject]\n", - " recursive: bool = shell.arg(argstr=\"-R\")\n", - " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", - "\n", - " class Outputs(ShellOutputs):\n", - "\n", - " @staticmethod\n", - " def get_file_size(out_file: Path) -> int:\n", - " \"\"\"Calculate the file size\"\"\"\n", - " result = os.stat(out_file)\n", - " return result.st_size\n", - "\n", - " out_file: generic.File\n", - " out_file_size: int = shell.out(callable=get_file_size)\n", - "\n", - "\n", - "pprint(fields_dict(CpWithSize))\n", - "pprint(fields_dict(CpWithSize.Outputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Workflow definitions\n", - "\n", - "Workflows can also be defined in canonical form, which is the same as for Python tasks\n", - "but with a staticmethod called `constructor` that constructs the workflow." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import python, workflow\n", - "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", - "\n", - "# Example python task definitions\n", - "@python.define\n", - "def Add(a, b):\n", - " return a + b\n", - "\n", - "\n", - "@python.define\n", - "def Mul(a, b):\n", - " return a * b\n", - "\n", - "\n", - "@workflow.define\n", - "class CanonicalWorkflowDef(WorkflowDef[\"CanonicalWorkflowDef.Outputs\"]):\n", - "\n", - " @staticmethod\n", - " def a_converter(value):\n", - " if value is None:\n", - " return value\n", - " return float(value)\n", - "\n", - " a: int\n", - " b: float = workflow.arg(\n", - " help=\"A float input\",\n", - " converter=a_converter,\n", - " )\n", - "\n", - " @staticmethod\n", - " def constructor(a, b):\n", - " add = workflow.add(Add(a=a, b=b))\n", - " mul = workflow.add(Mul(a=add.out, b=b))\n", - " return mul.out\n", - "\n", - " class Outputs(WorkflowOutputs):\n", - " out: float" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Canonical task form\n", + "\n", + "Under the hood, all Python, shell and workflow task definitions generated by the\n", + "`pydra.design.*.define` decorators/functions are translated to\n", + "[dataclasses](https://docs.python.org/3/library/dataclasses.html) by the\n", + "[Attrs](https://www.attrs.org/en/stable/). While the more compact syntax described\n", + "in the [Python-tasks](./4-python.html), [Shell-tasks](./5-shell.html) and [Workflow](./6-workflow.html)\n", + "tutorials is convenient when designing tasks for specific use cases, it is too magical\n", + "for linters follow. Therefore, when designing task definitions to be used by third\n", + "parties (e.g. `pydra-fsl`, `pydra-ants`) it is recommended to favour the, more\n", + "explicit, \"canonical\" dataclass form.\n", + "\n", + "The syntax of the canonical form is close to that used by the\n", + "[Attrs](https://www.attrs.org/en/stable/) package itself, with class type annotations\n", + "used to define the fields of the inputs and outputs of the task. Tasks defined in canonical\n", + "form will be able to be statically type-checked by [MyPy](https://mypy-lang.org/)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Python-task definitions\n", + "\n", + "Python tasks in dataclass form are decorated by `pydra.design.python.define`\n", + "with inputs listed as type annotations. Outputs are similarly defined in a nested class\n", + "called `Outputs`. The function to be executed should be a staticmethod called `function`.\n", + "Default values can also be set directly, as with Attrs classes.\n", + "\n", + "In order to allow static type-checkers to check the type of outputs of tasks added\n", + "to workflows, it is also necessary to explicitly extend from the `pydra.engine.specs.PythonDef`\n", + "and `pydra.engine.specs.PythonOutputs` classes (they are otherwise set as bases by the\n", + "`define` method implicitly). Thus the \"canonical form\" of Python task definition is as\n", + "follows" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pprint import pprint\n", + "from pydra.engine.helpers import fields_dict\n", + "from pydra.engine.specs import PythonDef, PythonOutputs\n", + "from pydra.design import python\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int\n", + " b: float = 2.0 # set default value\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To set additional attributes other than the type and default, such as `allowed_values`\n", + "and `validators`, `python.arg` and `python.out` can be used instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import attrs.validators\n", + "\n", + "\n", + "@python.define\n", + "class CanonicalPythonDef(PythonDef[\"CanonicalPythonDef.Outputs\"]):\n", + " \"\"\"Canonical Python task definition class for testing\n", + "\n", + " Args:\n", + " a: First input\n", + " to be inputted\n", + " b: Second input\n", + " \"\"\"\n", + "\n", + " a: int = python.arg(allowed_values=[1, 2, 3, 4, 5])\n", + " b: float = python.arg(default=2.0, validator=attrs.validators.not_(0))\n", + "\n", + " class Outputs(PythonOutputs):\n", + " \"\"\"\n", + " Args:\n", + " c: Sum of a and b\n", + " d: Product of a and b\n", + " \"\"\"\n", + "\n", + " c: float\n", + " d: float\n", + "\n", + " @staticmethod\n", + " def function(a, b):\n", + " return a + b, a / b\n", + "\n", + "pprint(fields_dict(CanonicalPythonDef))\n", + "pprint(fields_dict(CanonicalPythonDef.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Shell-task definitions\n", + "\n", + "The canonical form of shell tasks is the same as for Python tasks, except a string `executable`\n", + "attribute replaces the `function` staticmethod." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from fileformats import generic\n", + "from pydra.design import shell\n", + "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "from pydra.utils.typing import MultiInputObj\n", + "\n", + "\n", + "@shell.define\n", + "class CpWithSize(ShellDef[\"CpWithSize.Outputs\"]):\n", + "\n", + " executable = \"cp\"\n", + "\n", + " in_fs_objects: MultiInputObj[generic.FsObject]\n", + " recursive: bool = shell.arg(argstr=\"-R\")\n", + " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + "\n", + " class Outputs(ShellOutputs):\n", + "\n", + " @staticmethod\n", + " def get_file_size(out_file: Path) -> int:\n", + " \"\"\"Calculate the file size\"\"\"\n", + " result = os.stat(out_file)\n", + " return result.st_size\n", + "\n", + " out_file: generic.File\n", + " out_file_size: int = shell.out(callable=get_file_size)\n", + "\n", + "\n", + "pprint(fields_dict(CpWithSize))\n", + "pprint(fields_dict(CpWithSize.Outputs))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Workflow definitions\n", + "\n", + "Workflows can also be defined in canonical form, which is the same as for Python tasks\n", + "but with a staticmethod called `constructor` that constructs the workflow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import python, workflow\n", + "from pydra.engine.specs import WorkflowDef, WorkflowOutputs\n", + "\n", + "# Example python task definitions\n", + "@python.define\n", + "def Add(a, b):\n", + " return a + b\n", + "\n", + "\n", + "@python.define\n", + "def Mul(a, b):\n", + " return a * b\n", + "\n", + "\n", + "@workflow.define\n", + "class CanonicalWorkflowDef(WorkflowDef[\"CanonicalWorkflowDef.Outputs\"]):\n", + "\n", + " @staticmethod\n", + " def a_converter(value):\n", + " if value is None:\n", + " return value\n", + " return float(value)\n", + "\n", + " a: int\n", + " b: float = workflow.arg(\n", + " help=\"A float input\",\n", + " converter=a_converter,\n", + " )\n", + "\n", + " @staticmethod\n", + " def constructor(a, b):\n", + " add = workflow.add(Add(a=a, b=b))\n", + " mul = workflow.add(Mul(a=add.out, b=b))\n", + " return mul.out\n", + "\n", + " class Outputs(WorkflowOutputs):\n", + " out: float" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } From 33ef3d4f746bf5466ca88ed9dbd7d23e6ff4593c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 18:47:34 +1100 Subject: [PATCH 181/342] updated upload/download artifact versions to v4 --- .github/workflows/docs.yml | 8 ++++---- new-docs/source/tutorial/3-troubleshooting.ipynb | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 741cc868ba..72cbceca79 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -39,7 +39,7 @@ jobs: cd docs make html cd .. - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: docs path: docs/_build/html @@ -69,7 +69,7 @@ jobs: cd new-docs make html cd .. - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: new-docs path: new-docs/build/html @@ -79,12 +79,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Download docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: docs path: docs-build - name: Download new docs - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: new-docs path: docs-build/new diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 17b4307952..0edd4b8411 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -191,12 +191,14 @@ "metadata": {}, "outputs": [], "source": [ + "from pydra.engine.submitter import Submitter\n", "from pydra.tasks.testing import SafeDivisionWorkflow\n", "\n", "wf = SafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", - "with Submitter(worker=\"cf\") as sub:\n", - " result = sub(wf)\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", " \n", "print(f\"Workflow completed successfully, results saved in: {result.output_dir}\")" ] From d7f1b0b2713a842fedb474c2212291fcd3f44123 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 18:50:56 +1100 Subject: [PATCH 182/342] added python kernel install to docs build --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 72cbceca79..fe11e29b3a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -64,6 +64,8 @@ jobs: pip install build twine - name: Install package run: pip install .[doc] + - name: Install Python3 kernel + run: python -m ipykernel install --user - name: Build new docs run: | cd new-docs From bc4b3737cb0542e8dc11d6ad2f936d8d598b1f99 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 18:53:48 +1100 Subject: [PATCH 183/342] added ipykernel to docs deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 549b88a5af..342bd7ac42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ doc = [ "sphinx-argparse", "nbsphinx", "ipython", + "ipykernel", "nibabel", "nilearn", "pandas", From e1eae66f9966fe900369f78a463565a27ec16f32 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 21:26:20 +1100 Subject: [PATCH 184/342] updated print_help --- pydra/engine/helpers.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 5289ced5c9..bd75da3569 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -78,24 +78,26 @@ def from_list_if_single(obj: ty.Any) -> ty.Any: return obj -def print_help(obj): +def print_help(defn: "TaskDef[DefType]") -> list[str]: """Visit a task object and print its input/output interface.""" - lines = [f"Help for {obj.__class__.__name__}"] - if attrs.fields(obj.interface): + from pydra.design.base import EMPTY + + lines = [f"Help for {defn.__class__.__name__}"] + if list_fields(defn): lines += ["Input Parameters:"] - for f in attrs.fields(obj.interface): + for f in list_fields(defn): default = "" - if f.default != attrs.NOTHING and not f.name.startswith("_"): + if f.default is not EMPTY and not f.name.startswith("_"): default = f" (default: {f.default})" try: name = f.type.__name__ except AttributeError: name = str(f.type) lines += [f"- {f.name}: {name}{default}"] - output_klass = obj.interface.Outputs - if attrs.fields(output_klass): + output_klass = defn.Outputs + if list_fields(output_klass): lines += ["Output Parameters:"] - for f in attrs.fields(output_klass): + for f in list_fields(output_klass): try: name = f.type.__name__ except AttributeError: From 6bb494a662a4ce257db11bfa50934e15b20afef9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 21:26:59 +1100 Subject: [PATCH 185/342] added fileformats-medimage to docs deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 342bd7ac42..4f55cb8225 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ doc = [ "pandoc", "numpy", "scipy", + "fileformats-medimage", # "pydra-mrtrix3", "sphinx_rtd_theme", "sphinx-click", From fe41b999b5d3de4a7d8ceebb4dec1e3a71a79777 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 21:27:09 +1100 Subject: [PATCH 186/342] debugging unittests after syntax changes --- pydra/engine/tests/test_helpers.py | 120 ++++++++------------------ pydra/engine/tests/test_task.py | 132 ++++++++++++++--------------- 2 files changed, 100 insertions(+), 152 deletions(-) diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index b71542beb1..d1ef760612 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -6,12 +6,11 @@ import typing as ty import pytest import cloudpickle as cp -from unittest.mock import Mock from pydra.engine.submitter import Submitter from pydra.engine.specs import Result from pydra.engine.core import Task +from pydra.design import workflow from fileformats.generic import Directory, File -from fileformats.core import FileSet from .utils import Multiply, RaiseXeq1 from ..helpers import ( get_available_cpus, @@ -179,47 +178,39 @@ def test_get_available_cpus(): def test_load_and_run(tmpdir): """testing load_and_run for pickled task""" task_pkl = Path(tmpdir.join("task_main.pkl")) - - task = Multiply(name="mult", y=10).split(x=[1, 2]) - task.state.prepare_states(inputs=task.inputs) - task.state.prepare_inputs() + # Note that tasks now don't have state arrays and indices, just a single resolved + # set of parameters that are ready to run + task = Task(name="mult", definition=Multiply(x=2, y=10), submitter=Submitter()) with task_pkl.open("wb") as fp: cp.dump(task, fp) - - resultfile_0 = load_and_run(task_pkl=task_pkl, ind=0) - resultfile_1 = load_and_run(task_pkl=task_pkl, ind=1) + resultfile = load_and_run(task_pkl=task_pkl) # checking the result files - result_0 = cp.loads(resultfile_0.read_bytes()) - result_1 = cp.loads(resultfile_1.read_bytes()) - assert result_0.output.out == 10 - assert result_1.output.out == 20 - - -def test_load_and_run_exception_load(tmpdir): - """testing raising exception and saving info in crashfile when when load_and_run""" - task_pkl = Path(tmpdir.join("task_main.pkl")) - RaiseXeq1(name="raise").split("x", x=[1, 2]) - with pytest.raises(FileNotFoundError): - load_and_run(task_pkl=task_pkl, ind=0) + result = cp.loads(resultfile.read_bytes()) + assert result.outputs.out == 20 def test_load_and_run_exception_run(tmpdir): """testing raising exception and saving info in crashfile when when load_and_run""" task_pkl = Path(tmpdir.join("task_main.pkl")) + cache_root = Path(tmpdir.join("cache")) + cache_root.mkdir() - task = RaiseXeq1(name="raise").split("x", x=[1, 2]) - task.state.prepare_states(inputs=task.inputs) - task.state.prepare_inputs() + task = Task( + definition=RaiseXeq1(x=1), + name="raise", + submitter=Submitter(worker="cf", cache_dir=cache_root), + ) with task_pkl.open("wb") as fp: cp.dump(task, fp) with pytest.raises(Exception) as excinfo: - load_and_run(task_pkl=task_pkl, ind=0) - assert "i'm raising an exception!" in str(excinfo.value) + load_and_run(task_pkl=task_pkl) + exc_msg = excinfo.value.args[0] + assert "i'm raising an exception!" in exc_msg # checking if the crashfile has been created - assert "crash" in str(excinfo.value) - errorfile = Path(str(excinfo.value).split("here: ")[1][:-2]) + assert "crash" in exc_msg + errorfile = Path(exc_msg.split("here: ")[1][:-2]) assert errorfile.exists() resultfile = errorfile.parent / "_result.pklz" @@ -228,37 +219,35 @@ def test_load_and_run_exception_run(tmpdir): result_exception = cp.loads(resultfile.read_bytes()) assert result_exception.errored is True + task = Task(definition=RaiseXeq1(x=2), name="wont_raise", submitter=Submitter()) + + with task_pkl.open("wb") as fp: + cp.dump(task, fp) + # the second task should be fine - resultfile = load_and_run(task_pkl=task_pkl, ind=1) + resultfile = load_and_run(task_pkl=task_pkl) result_1 = cp.loads(resultfile.read_bytes()) - assert result_1.output.out == 2 + assert result_1.outputs.out == 2 def test_load_and_run_wf(tmpdir): """testing load_and_run for pickled task""" wf_pkl = Path(tmpdir.join("wf_main.pkl")) - wf = Workflow(name="wf", input_spec=["x", "y"], y=10) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.split("x", x=[1, 2]) - - wf.set_output([("out", wf.mult.lzout.out)]) + @workflow.define + def Workflow(x, y=10): + multiply = workflow.add(Multiply(x=x, y=y)) + return multiply.out - # task = multiply(name="mult", x=[1, 2], y=10).split("x") - wf.state.prepare_states(inputs=wf.inputs) - wf.state.prepare_inputs() - wf.plugin = "cf" + task = Task(name="mult", definition=Workflow(x=2), submitter=Submitter(worker="cf")) with wf_pkl.open("wb") as fp: - cp.dump(wf, fp) + cp.dump(task, fp) - resultfile_0 = load_and_run(ind=0, task_pkl=wf_pkl) - resultfile_1 = load_and_run(ind=1, task_pkl=wf_pkl) + resultfile = load_and_run(task_pkl=wf_pkl) # checking the result files - result_0 = cp.loads(resultfile_0.read_bytes()) - result_1 = cp.loads(resultfile_1.read_bytes()) - assert result_0.output.out == 10 - assert result_1.output.out == 20 + result = cp.loads(resultfile.read_bytes()) + assert result.outputs.out == 20 @pytest.mark.parametrize( @@ -276,45 +265,6 @@ def test_position_sort(pos_args): assert final_args == ["a", "b", "c"] -def test_parse_copyfile(): - Mode = FileSet.CopyMode - Collation = FileSet.CopyCollation - - def mock_field(copyfile): - mock = Mock(["metadata"]) - mock.metadata = {"copyfile": copyfile} - return mock - - assert parse_copyfile(mock_field((Mode.any, Collation.any))) == ( - Mode.any, - Collation.any, - ) - assert parse_copyfile(mock_field("copy"), default_collation=Collation.siblings) == ( - Mode.copy, - Collation.siblings, - ) - assert parse_copyfile(mock_field("link,adjacent")) == ( - Mode.link, - Collation.adjacent, - ) - assert parse_copyfile(mock_field(True)) == ( - Mode.copy, - Collation.any, - ) - assert parse_copyfile(mock_field(False)) == ( - Mode.link, - Collation.any, - ) - assert parse_copyfile(mock_field(None)) == ( - Mode.any, - Collation.any, - ) - with pytest.raises(TypeError, match="Unrecognised type for mode copyfile"): - parse_copyfile(mock_field((1, 2))) - with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"): - parse_copyfile(mock_field((Mode.copy, 2))) - - def test_parse_format_string1(): assert parse_format_string("{a}") == {"a"} diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 18a11f30c6..696a73de84 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -11,7 +11,9 @@ from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages from ..task import AuditFlag, ShellTask from pydra.engine.specs import argstr_formatting +from pydra.engine.helpers import list_fields, print_help from .utils import BasicWorkflow +from pydra.utils import default_run_cache_dir from pydra.utils.typing import ( MultiInputObj, MultiOutputObj, @@ -68,30 +70,27 @@ def testfunc(a: int, b: float = 0.1) -> float: return a + b funky = testfunc(a=1) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "b") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 1 - assert getattr(funky.inputs, "b") == 0.1 - assert getattr(funky.inputs, "_func") is not None - assert set(funky.output_names) == {"out_out"} - # assert funky.inputs.hash == '17772c3aec9540a8dd3e187eecd2301a09c9a25c6e371ddd86e31e3a1ecfeefa' - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum + assert hasattr(funky, "a") + assert hasattr(funky, "b") + assert hasattr(funky, "function") + assert getattr(funky, "a") == 1 + assert getattr(funky, "b") == 0.1 + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"out_out"} outputs = funky() assert hasattr(outputs, "out_out") assert outputs.out_out == 1.1 - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") - funky.result() # should not recompute - funky.inputs.a = 2 - # assert funky.checksum == '537d25885fd2ea5662b7701ba02c132c52a9078a3a2d56aa903a777ea90e5536' - assert funky.result() is None - funky() - result = funky.result() + assert os.path.exists( + default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" + ) + funky() # should not recompute + funky.a = 2 + outputs = funky() assert outputs.out_out == 2.1 - help = funky.help(returnhelp=True) + help = print_help(funky) assert help == [ "Help for PythonTask", "Input Parameters:", @@ -106,8 +105,7 @@ def testfunc(a: int, b: float = 0.1) -> float: def test_annotated_func_dictreturn(): """Test mapping from returned dictionary to output definition.""" - @python.define - @mark.annotate({"return": {"sum": int, "mul": ty.Optional[int]}}) + @python.define(outputs={"sum": int, "mul": ty.Optional[int]}) def testfunc(a: int, b: int): return dict(sum=a + b, diff=a - b) @@ -136,12 +134,12 @@ def testfunc( return math.modf(a)[0], int(math.modf(a)[1]) funky = testfunc(a=3.5) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 3.5 - assert getattr(funky.inputs, "_func") is not None + assert hasattr(funky, "a") + assert hasattr(funky, "_func") + assert getattr(funky, "a") == 3.5 + assert getattr(funky, "_func") is not None assert set(funky.output_names) == {"fractional", "integer"} - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum + assert funky.__class__.__name__ + "_" + funky.hash == funky.checksum outputs = funky() assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") @@ -171,7 +169,7 @@ def testfunc(a: float): return a funky = testfunc(a=3.5) - assert getattr(funky.inputs, "a") == 3.5 + assert getattr(funky, "a") == 3.5 def test_annotated_input_func_2(): @@ -194,7 +192,7 @@ def testfunc(a: int): funky = testfunc() with pytest.raises(TypeError): - funky.inputs.a = 3.5 + funky.a = 3.5 def test_annotated_input_func_3(): @@ -205,7 +203,7 @@ def testfunc(a: list): return sum(a) funky = testfunc(a=[1, 3.5]) - assert getattr(funky.inputs, "a") == [1, 3.5] + assert getattr(funky, "a") == [1, 3.5] def test_annotated_input_func_3a(): @@ -216,7 +214,7 @@ def testfunc(a: ty.List[float]): return sum(a) funky = testfunc(a=[1.0, 3.5]) - assert getattr(funky.inputs, "a") == [1.0, 3.5] + assert getattr(funky, "a") == [1.0, 3.5] def test_annotated_input_func_3b(): @@ -229,7 +227,7 @@ def testfunc(a: ty.List[float]): return sum(a) funky = testfunc(a=[1, 3.5]) - assert getattr(funky.inputs, "a") == [1, 3.5] + assert getattr(funky, "a") == [1, 3.5] def test_annotated_input_func_3c_excep(): @@ -253,7 +251,7 @@ def testfunc(a: dict): return sum(a.values()) funky = testfunc(a={"el1": 1, "el2": 3.5}) - assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} + assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} def test_annotated_input_func_4a(): @@ -264,7 +262,7 @@ def testfunc(a: ty.Dict[str, float]): return sum(a.values()) funky = testfunc(a={"el1": 1, "el2": 3.5}) - assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} + assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} def test_annotated_input_func_4b_excep(): @@ -289,7 +287,7 @@ def testfunc(a: ty.Dict[str, ty.List]): return sum(a["el1"]) funky = testfunc(a={"el1": [1, 3.5]}) - assert getattr(funky.inputs, "a") == {"el1": [1, 3.5]} + assert getattr(funky, "a") == {"el1": [1, 3.5]} def test_annotated_input_func_5a_except(): @@ -315,7 +313,7 @@ def testfunc(a: ty.Dict[str, ty.Union[float, int]]): return sum(a["el1"]) funky = testfunc(a={"el1": 1, "el2": 3.5}) - assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} + assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} def test_annotated_input_func_6a_excep(): @@ -342,7 +340,7 @@ def testfunc(a: float): return a funky = testfunc().split("a", a=[3.5, 2.1]) - assert getattr(funky.inputs, "a") == [3.5, 2.1] + assert getattr(funky, "a") == [3.5, 2.1] def test_annotated_input_func_7a_excep(): @@ -368,7 +366,7 @@ def testfunc(a: MultiInputObj): return len(a) funky = testfunc(a=3.5) - assert getattr(funky.inputs, "a") == [3.5] + assert getattr(funky, "a") == [3.5] res = funky() assert res.output.out == 1 @@ -383,7 +381,7 @@ def testfunc(a: MultiInputObj): return len(a) funky = testfunc(a=[3.5]) - assert getattr(funky.inputs, "a") == [3.5] + assert getattr(funky, "a") == [3.5] res = funky() assert res.output.out == 1 @@ -400,8 +398,8 @@ def testfunc(a: MultiInputObj): funky = testfunc() # setting a after init - funky.inputs.a = 3.5 - assert getattr(funky.inputs, "a") == [3.5] + funky.a = 3.5 + assert getattr(funky, "a") == [3.5] res = funky() assert res.output.out == 1 @@ -433,14 +431,14 @@ def testfunc(a, b) -> int: return a + b funky = testfunc(a=10, b=20) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "b") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 10 - assert getattr(funky.inputs, "b") == 20 - assert getattr(funky.inputs, "_func") is not None + assert hasattr(funky, "a") + assert hasattr(funky, "b") + assert hasattr(funky, "_func") + assert getattr(funky, "a") == 10 + assert getattr(funky, "b") == 20 + assert getattr(funky, "_func") is not None assert set(funky.output_names) == {"out"} - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum + assert funky.__class__.__name__ + "_" + funky.hash == funky.checksum outputs = funky() assert hasattr(result, "output") @@ -450,7 +448,7 @@ def testfunc(a, b) -> int: assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") funky.result() # should not recompute - funky.inputs.a = 11 + funky.a = 11 assert funky.result() is None funky() result = funky.result() @@ -474,14 +472,14 @@ def testfunc(a, b) -> (int, int): return a + 1, b + 1 funky = testfunc(a=10, b=20) - assert hasattr(funky.inputs, "a") - assert hasattr(funky.inputs, "b") - assert hasattr(funky.inputs, "_func") - assert getattr(funky.inputs, "a") == 10 - assert getattr(funky.inputs, "b") == 20 - assert getattr(funky.inputs, "_func") is not None + assert hasattr(funky, "a") + assert hasattr(funky, "b") + assert hasattr(funky, "_func") + assert getattr(funky, "a") == 10 + assert getattr(funky, "b") == 20 + assert getattr(funky, "_func") is not None assert set(funky.output_names) == {"out1", "out2"} - assert funky.__class__.__name__ + "_" + funky.inputs.hash == funky.checksum + assert funky.__class__.__name__ + "_" + funky.hash == funky.checksum outputs = funky() assert hasattr(result, "output") @@ -491,7 +489,7 @@ def testfunc(a, b) -> (int, int): assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") funky.result() # should not recompute - funky.inputs.a = 11 + funky.a = 11 assert funky.result() is None funky() result = funky.result() @@ -516,9 +514,9 @@ def no_annots(c, d): return c + d natask = no_annots(c=17, d=3.2) - assert hasattr(natask.inputs, "c") - assert hasattr(natask.inputs, "d") - assert hasattr(natask.inputs, "_func") + assert hasattr(natask, "c") + assert hasattr(natask, "d") + assert hasattr(natask, "_func") result = natask._run() assert hasattr(result, "output") @@ -561,9 +559,9 @@ def no_annots(c, d): return c + d, c - d natask = no_annots(c=17, d=3.2) - assert hasattr(natask.inputs, "c") - assert hasattr(natask.inputs, "d") - assert hasattr(natask.inputs, "_func") + assert hasattr(natask, "c") + assert hasattr(natask, "d") + assert hasattr(natask, "_func") result = natask._run() assert hasattr(result, "output") @@ -585,7 +583,7 @@ def testfunc(a): ) funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 3.5 + assert getattr(funky, "a") == 3.5 def test_input_spec_func_1a_except(): @@ -660,7 +658,7 @@ def testfunc(a: int): ) funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 3.5 + assert getattr(funky, "a") == 3.5 def test_input_spec_func_2a(): @@ -680,7 +678,7 @@ def testfunc(a: int): ) funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 3.5 + assert getattr(funky, "a") == 3.5 def test_input_spec_func_3(): @@ -707,7 +705,7 @@ def testfunc(a): ) funky = testfunc(a=2, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == 2 + assert getattr(funky, "a") == 2 def test_input_spec_func_3a_except(): @@ -808,7 +806,7 @@ def testfunc(a): ) funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == MultiInputObj([3.5]) + assert getattr(funky, "a") == MultiInputObj([3.5]) res = funky() assert res.output.out == 1 @@ -1254,7 +1252,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] ) wf.add(testfunc(name="testfunc", a=wf.lzin.x)) wf.set_output([("out", wf.testfunc.lzout.out)]) - wf.inputs.x = 2 + wf.x = 2 wf(plugin="cf") # default path @@ -1351,7 +1349,7 @@ def myhook(task, *args): foo.hooks.post_run = myhook foo.hooks.pre_run_task = myhook foo.hooks.post_run_task = myhook - foo.inputs.a = 2 # ensure not pre-cached + foo.a = 2 # ensure not pre-cached foo() captured = capsys.readouterr() assert captured.out.count("I was called\n") == 4 From 8906e8f3fb106a26078e4928ea38b91d3c585522 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 21:38:38 +1100 Subject: [PATCH 187/342] debugged test_helpers --- pydra/engine/tests/test_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index d1ef760612..2e183c4e97 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -209,8 +209,8 @@ def test_load_and_run_exception_run(tmpdir): exc_msg = excinfo.value.args[0] assert "i'm raising an exception!" in exc_msg # checking if the crashfile has been created - assert "crash" in exc_msg - errorfile = Path(exc_msg.split("here: ")[1][:-2]) + assert "crash" in excinfo.value.__notes__[0] + errorfile = Path(excinfo.value.__notes__[0].split("here: ")[1]) assert errorfile.exists() resultfile = errorfile.parent / "_result.pklz" From d761613b9b9d5361a741702c6af03e2c9f878982 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 21:47:22 +1100 Subject: [PATCH 188/342] cleaned up print_help --- pydra/engine/helpers.py | 4 ++++ pydra/engine/tests/test_task.py | 32 +++++++++++++++----------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index bd75da3569..5b89f6a31c 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -86,6 +86,10 @@ def print_help(defn: "TaskDef[DefType]") -> list[str]: if list_fields(defn): lines += ["Input Parameters:"] for f in list_fields(defn): + if (defn._task_type == "python" and f.name == "function") or ( + defn._task_type == "workflow" and f.name == "constructor" + ): + continue default = "" if f.default is not EMPTY and not f.name.startswith("_"): default = f" (default: {f.default})" diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 696a73de84..12ad079c9b 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -66,10 +66,10 @@ def test_checksum(): def test_annotated_func(): @python.define(outputs=["out_out"]) - def testfunc(a: int, b: float = 0.1) -> float: + def TestFunc(a: int, b: float = 0.1) -> float: return a + b - funky = testfunc(a=1) + funky = TestFunc(a=1) assert hasattr(funky, "a") assert hasattr(funky, "b") assert hasattr(funky, "function") @@ -92,11 +92,10 @@ def testfunc(a: int, b: float = 0.1) -> float: help = print_help(funky) assert help == [ - "Help for PythonTask", + "Help for TestFunc", "Input Parameters:", "- a: int", "- b: float (default: 0.1)", - "- _func: bytes", "Output Parameters:", "- out_out: float", ] @@ -125,36 +124,35 @@ def testfunc(a: int, b: int): def test_annotated_func_multreturn(): """the function has two elements in the return statement""" - @python.define - def testfunc( + @python.define(outputs={"fractional": float, "integer": int}) + def TestFunc( a: float, - ) -> ty.NamedTuple("Output", [("fractional", float), ("integer", int)]): + ): import math return math.modf(a)[0], int(math.modf(a)[1]) - funky = testfunc(a=3.5) + funky = TestFunc(a=3.5) assert hasattr(funky, "a") - assert hasattr(funky, "_func") + assert hasattr(funky, "function") assert getattr(funky, "a") == 3.5 - assert getattr(funky, "_func") is not None - assert set(funky.output_names) == {"fractional", "integer"} - assert funky.__class__.__name__ + "_" + funky.hash == funky.checksum + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"fractional", "integer"} outputs = funky() - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") - assert hasattr(result, "output") + assert os.path.exists( + default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" + ) assert hasattr(outputs, "fractional") assert outputs.fractional == 0.5 assert hasattr(outputs, "integer") assert outputs.integer == 3 - help = funky.help(returnhelp=True) + help = print_help(funky) assert help == [ - "Help for PythonTask", + "Help for TestFunc", "Input Parameters:", "- a: float", - "- _func: bytes", "Output Parameters:", "- fractional: float", "- integer: int", From 92cf3785176c7d96a2a28a2420fb3106bfe846f0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Feb 2025 21:53:46 +1100 Subject: [PATCH 189/342] debugging task tests --- pydra/engine/tests/test_task.py | 310 ++++++++++++++++---------------- 1 file changed, 153 insertions(+), 157 deletions(-) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 12ad079c9b..95dc0c3ccf 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -105,10 +105,10 @@ def test_annotated_func_dictreturn(): """Test mapping from returned dictionary to output definition.""" @python.define(outputs={"sum": int, "mul": ty.Optional[int]}) - def testfunc(a: int, b: int): + def TestFunc(a: int, b: int): return dict(sum=a + b, diff=a - b) - task = testfunc(a=2, b=3) + task = TestFunc(a=2, b=3) outputs = task() # Part of the annotation and returned, should be exposed to output. @@ -163,10 +163,10 @@ def test_annotated_input_func_1(): """the function with annotated input (float)""" @python.define - def testfunc(a: float): + def TestFunc(a: float): return a - funky = testfunc(a=3.5) + funky = TestFunc(a=3.5) assert getattr(funky, "a") == 3.5 @@ -174,21 +174,21 @@ def test_annotated_input_func_2(): """the function with annotated input (int, but float provided)""" @python.define - def testfunc(a: int): + def TestFunc(a: int): return a with pytest.raises(TypeError): - testfunc(a=3.5) + TestFunc(a=3.5) def test_annotated_input_func_2a(): """the function with annotated input (int, but float provided)""" @python.define - def testfunc(a: int): + def TestFunc(a: int): return a - funky = testfunc() + funky = TestFunc() with pytest.raises(TypeError): funky.a = 3.5 @@ -197,10 +197,10 @@ def test_annotated_input_func_3(): """the function with annotated input (list)""" @python.define - def testfunc(a: list): + def TestFunc(a: list): return sum(a) - funky = testfunc(a=[1, 3.5]) + funky = TestFunc(a=[1, 3.5]) assert getattr(funky, "a") == [1, 3.5] @@ -208,10 +208,10 @@ def test_annotated_input_func_3a(): """the function with annotated input (list of floats)""" @python.define - def testfunc(a: ty.List[float]): + def TestFunc(a: ty.List[float]): return sum(a) - funky = testfunc(a=[1.0, 3.5]) + funky = TestFunc(a=[1.0, 3.5]) assert getattr(funky, "a") == [1.0, 3.5] @@ -221,10 +221,10 @@ def test_annotated_input_func_3b(): """ @python.define - def testfunc(a: ty.List[float]): + def TestFunc(a: ty.List[float]): return sum(a) - funky = testfunc(a=[1, 3.5]) + funky = TestFunc(a=[1, 3.5]) assert getattr(funky, "a") == [1, 3.5] @@ -234,21 +234,21 @@ def test_annotated_input_func_3c_excep(): """ @python.define - def testfunc(a: ty.List[int]): + def TestFunc(a: ty.List[int]): return sum(a) with pytest.raises(TypeError): - testfunc(a=[1, 3.5]) + TestFunc(a=[1, 3.5]) def test_annotated_input_func_4(): """the function with annotated input (dictionary)""" @python.define - def testfunc(a: dict): + def TestFunc(a: dict): return sum(a.values()) - funky = testfunc(a={"el1": 1, "el2": 3.5}) + funky = TestFunc(a={"el1": 1, "el2": 3.5}) assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} @@ -256,10 +256,10 @@ def test_annotated_input_func_4a(): """the function with annotated input (dictionary of floats)""" @python.define - def testfunc(a: ty.Dict[str, float]): + def TestFunc(a: ty.Dict[str, float]): return sum(a.values()) - funky = testfunc(a={"el1": 1, "el2": 3.5}) + funky = TestFunc(a={"el1": 1, "el2": 3.5}) assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} @@ -267,11 +267,11 @@ def test_annotated_input_func_4b_excep(): """the function with annotated input (dictionary of ints, but float provided)""" @python.define - def testfunc(a: ty.Dict[str, int]): + def TestFunc(a: ty.Dict[str, int]): return sum(a.values()) with pytest.raises(TypeError): - testfunc(a={"el1": 1, "el2": 3.5}) + TestFunc(a={"el1": 1, "el2": 3.5}) def test_annotated_input_func_5(): @@ -281,10 +281,10 @@ def test_annotated_input_func_5(): """ @python.define - def testfunc(a: ty.Dict[str, ty.List]): + def TestFunc(a: ty.Dict[str, ty.List]): return sum(a["el1"]) - funky = testfunc(a={"el1": [1, 3.5]}) + funky = TestFunc(a={"el1": [1, 3.5]}) assert getattr(funky, "a") == {"el1": [1, 3.5]} @@ -294,11 +294,11 @@ def test_annotated_input_func_5a_except(): """ @python.define - def testfunc(a: ty.Dict[str, ty.Dict[str, float]]): + def TestFunc(a: ty.Dict[str, ty.Dict[str, float]]): return sum(a["el1"]) with pytest.raises(TypeError): - testfunc(a={"el1": [1, 3.5]}) + TestFunc(a={"el1": [1, 3.5]}) def test_annotated_input_func_6(): @@ -307,10 +307,10 @@ def test_annotated_input_func_6(): """ @python.define - def testfunc(a: ty.Dict[str, ty.Union[float, int]]): + def TestFunc(a: ty.Dict[str, ty.Union[float, int]]): return sum(a["el1"]) - funky = testfunc(a={"el1": 1, "el2": 3.5}) + funky = TestFunc(a={"el1": 1, "el2": 3.5}) assert getattr(funky, "a") == {"el1": 1, "el2": 3.5} @@ -320,11 +320,11 @@ def test_annotated_input_func_6a_excep(): """ @python.define - def testfunc(a: ty.Dict[str, ty.Union[str, int]]): + def TestFunc(a: ty.Dict[str, ty.Union[str, int]]): return sum(a["el1"]) with pytest.raises(TypeError): - testfunc(a={"el1": 1, "el2": 3.5}) + TestFunc(a={"el1": 1, "el2": 3.5}) def test_annotated_input_func_7(): @@ -334,10 +334,10 @@ def test_annotated_input_func_7(): """ @python.define - def testfunc(a: float): + def TestFunc(a: float): return a - funky = testfunc().split("a", a=[3.5, 2.1]) + funky = TestFunc().split("a", a=[3.5, 2.1]) assert getattr(funky, "a") == [3.5, 2.1] @@ -347,11 +347,11 @@ def test_annotated_input_func_7a_excep(): """ @python.define - def testfunc(a: int): + def TestFunc(a: int): return a with pytest.raises(TypeError): - testfunc(a=[3.5, 2.1]).split("a") + TestFunc(a=[3.5, 2.1]).split("a") def test_annotated_input_func_8(): @@ -360,13 +360,13 @@ def test_annotated_input_func_8(): """ @python.define - def testfunc(a: MultiInputObj): + def TestFunc(a: MultiInputObj): return len(a) - funky = testfunc(a=3.5) + funky = TestFunc(a=3.5) assert getattr(funky, "a") == [3.5] - res = funky() - assert res.output.out == 1 + outputs = funky() + assert outputs.out == 1 def test_annotated_input_func_8a(): @@ -375,13 +375,13 @@ def test_annotated_input_func_8a(): """ @python.define - def testfunc(a: MultiInputObj): + def TestFunc(a: MultiInputObj): return len(a) - funky = testfunc(a=[3.5]) + funky = TestFunc(a=[3.5]) assert getattr(funky, "a") == [3.5] - res = funky() - assert res.output.out == 1 + outputs = funky() + assert outputs.out == 1 def test_annotated_input_func_8b(): @@ -391,15 +391,15 @@ def test_annotated_input_func_8b(): """ @python.define - def testfunc(a: MultiInputObj): + def TestFunc(a: MultiInputObj): return len(a) - funky = testfunc() + funky = TestFunc() # setting a after init funky.a = 3.5 assert getattr(funky, "a") == [3.5] - res = funky() - assert res.output.out == 1 + outputs = funky() + assert outputs.out == 1 def test_annotated_func_multreturn_exception(): @@ -408,7 +408,7 @@ def test_annotated_func_multreturn_exception(): """ @python.define - def testfunc( + def TestFunc( a: float, ) -> ty.NamedTuple( "Output", [("fractional", float), ("integer", int), ("who_knows", int)] @@ -417,7 +417,7 @@ def testfunc( return math.modf(a) - funky = testfunc(a=3.5) + funky = TestFunc(a=3.5) with pytest.raises(Exception) as excinfo: funky() assert "expected 3 elements" in str(excinfo.value) @@ -425,40 +425,38 @@ def testfunc( def test_halfannotated_func(): @python.define - def testfunc(a, b) -> int: + def TestFunc(a, b) -> int: return a + b - funky = testfunc(a=10, b=20) + funky = TestFunc(a=10, b=20) assert hasattr(funky, "a") assert hasattr(funky, "b") - assert hasattr(funky, "_func") + assert hasattr(funky, "function") assert getattr(funky, "a") == 10 assert getattr(funky, "b") == 20 - assert getattr(funky, "_func") is not None - assert set(funky.output_names) == {"out"} - assert funky.__class__.__name__ + "_" + funky.hash == funky.checksum + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"out"} outputs = funky() - assert hasattr(result, "output") assert hasattr(outputs, "out") assert outputs.out == 30 - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") + assert os.path.exists( + default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" + ) funky.result() # should not recompute funky.a = 11 assert funky.result() is None - funky() - result = funky.result() + outputs = funky() assert outputs.out == 31 help = funky.help(returnhelp=True) assert help == [ - "Help for PythonTask", + "Help for TestFunc", "Input Parameters:", "- a: _empty", "- b: _empty", - "- _func: bytes", "Output Parameters:", "- out: int", ] @@ -466,40 +464,38 @@ def testfunc(a, b) -> int: def test_halfannotated_func_multreturn(): @python.define - def testfunc(a, b) -> (int, int): + def TestFunc(a, b) -> (int, int): return a + 1, b + 1 - funky = testfunc(a=10, b=20) + funky = TestFunc(a=10, b=20) assert hasattr(funky, "a") assert hasattr(funky, "b") - assert hasattr(funky, "_func") + assert hasattr(funky, "function") assert getattr(funky, "a") == 10 assert getattr(funky, "b") == 20 - assert getattr(funky, "_func") is not None - assert set(funky.output_names) == {"out1", "out2"} - assert funky.__class__.__name__ + "_" + funky.hash == funky.checksum + assert getattr(funky, "function") is not None + assert set(f.name for f in list_fields(funky.Outputs)) == {"out1", "out2"} outputs = funky() - assert hasattr(result, "output") assert hasattr(outputs, "out1") assert outputs.out1 == 11 - assert os.path.exists(funky.cache_dir / funky.checksum / "_result.pklz") + assert os.path.exists( + default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" + ) funky.result() # should not recompute funky.a = 11 assert funky.result() is None - funky() - result = funky.result() + outputs = funky() assert outputs.out1 == 12 help = funky.help(returnhelp=True) assert help == [ - "Help for PythonTask", + "Help for TestFunc", "Input Parameters:", "- a: _empty", "- b: _empty", - "- _func: bytes", "Output Parameters:", "- out1: int", "- out2: int", @@ -514,7 +510,7 @@ def no_annots(c, d): natask = no_annots(c=17, d=3.2) assert hasattr(natask, "c") assert hasattr(natask, "d") - assert hasattr(natask, "_func") + assert hasattr(natask, "function") result = natask._run() assert hasattr(result, "output") @@ -559,7 +555,7 @@ def no_annots(c, d): natask = no_annots(c=17, d=3.2) assert hasattr(natask, "c") assert hasattr(natask, "d") - assert hasattr(natask, "_func") + assert hasattr(natask, "function") result = natask._run() assert hasattr(result, "output") @@ -571,7 +567,7 @@ def test_input_spec_func_1(): """the function w/o annotated, but input_spec is used""" @python.define - def testfunc(a): + def TestFunc(a): return a my_input_spec = SpecInfo( @@ -580,7 +576,7 @@ def testfunc(a): bases=(FunctionDef,), ) - funky = testfunc(a=3.5, input_spec=my_input_spec) + funky = TestFunc(a=3.5, input_spec=my_input_spec) assert getattr(funky, "a") == 3.5 @@ -590,7 +586,7 @@ def test_input_spec_func_1a_except(): """ @python.define - def testfunc(a): + def TestFunc(a): return a my_input_spec = SpecInfo( @@ -599,7 +595,7 @@ def testfunc(a): bases=(FunctionDef,), ) with pytest.raises(TypeError): - testfunc(a=3.5, input_spec=my_input_spec) + TestFunc(a=3.5, input_spec=my_input_spec) def test_input_spec_func_1b_except(): @@ -608,7 +604,7 @@ def test_input_spec_func_1b_except(): """ @python.define - def testfunc(a): + def TestFunc(a): return a my_input_spec = SpecInfo( @@ -622,7 +618,7 @@ def testfunc(a): bases=(FunctionDef,), ) with pytest.raises(AttributeError, match="only these keys are supported"): - testfunc(a=3.5, input_spec=my_input_spec) + TestFunc(a=3.5, input_spec=my_input_spec) def test_input_spec_func_1d_except(): @@ -631,11 +627,11 @@ def test_input_spec_func_1d_except(): """ @python.define - def testfunc(a): + def TestFunc(a): return a my_input_spec = SpecInfo(name="Input", fields=[], bases=(FunctionDef,)) - funky = testfunc(a=3.5, input_spec=my_input_spec) + funky = TestFunc(a=3.5, input_spec=my_input_spec) with pytest.raises(TypeError, match="missing 1 required positional argument"): funky() @@ -646,7 +642,7 @@ def test_input_spec_func_2(): """ @python.define - def testfunc(a: int): + def TestFunc(a: int): return a my_input_spec = SpecInfo( @@ -655,7 +651,7 @@ def testfunc(a: int): bases=(FunctionDef,), ) - funky = testfunc(a=3.5, input_spec=my_input_spec) + funky = TestFunc(a=3.5, input_spec=my_input_spec) assert getattr(funky, "a") == 3.5 @@ -666,7 +662,7 @@ def test_input_spec_func_2a(): """ @python.define - def testfunc(a: int): + def TestFunc(a: int): return a my_input_spec = SpecInfo( @@ -675,7 +671,7 @@ def testfunc(a: int): bases=(FunctionDef,), ) - funky = testfunc(a=3.5, input_spec=my_input_spec) + funky = TestFunc(a=3.5, input_spec=my_input_spec) assert getattr(funky, "a") == 3.5 @@ -685,7 +681,7 @@ def test_input_spec_func_3(): """ @python.define - def testfunc(a): + def TestFunc(a): return a my_input_spec = SpecInfo( @@ -702,7 +698,7 @@ def testfunc(a): bases=(FunctionDef,), ) - funky = testfunc(a=2, input_spec=my_input_spec) + funky = TestFunc(a=2, input_spec=my_input_spec) assert getattr(funky, "a") == 2 @@ -712,7 +708,7 @@ def test_input_spec_func_3a_except(): """ @python.define - def testfunc(a): + def TestFunc(a): return a my_input_spec = SpecInfo( @@ -730,7 +726,7 @@ def testfunc(a): ) with pytest.raises(ValueError, match="value of a has to be"): - testfunc(a=3, input_spec=my_input_spec) + TestFunc(a=3, input_spec=my_input_spec) def test_input_spec_func_4(): @@ -739,7 +735,7 @@ def test_input_spec_func_4(): """ @python.define - def testfunc(a, b=1): + def TestFunc(a, b=1): return a + b my_input_spec = SpecInfo( @@ -757,7 +753,7 @@ def testfunc(a, b=1): bases=(FunctionDef,), ) - funky = testfunc(a=2, input_spec=my_input_spec) + funky = TestFunc(a=2, input_spec=my_input_spec) with pytest.raises(Exception, match="b is mandatory"): funky() @@ -768,7 +764,7 @@ def test_input_spec_func_4a(): """ @python.define - def testfunc(a, b=1): + def TestFunc(a, b=1): return a + b my_input_spec = SpecInfo( @@ -783,9 +779,9 @@ def testfunc(a, b=1): bases=(FunctionDef,), ) - funky = testfunc(a=2, input_spec=my_input_spec) - res = funky() - assert res.output.out == 12 + funky = TestFunc(a=2, input_spec=my_input_spec) + outputs = funky() + assert outputs.out == 12 def test_input_spec_func_5(): @@ -794,7 +790,7 @@ def test_input_spec_func_5(): """ @python.define - def testfunc(a): + def TestFunc(a): return len(a) my_input_spec = SpecInfo( @@ -803,17 +799,17 @@ def testfunc(a): bases=(FunctionDef,), ) - funky = testfunc(a=3.5, input_spec=my_input_spec) + funky = TestFunc(a=3.5, input_spec=my_input_spec) assert getattr(funky, "a") == MultiInputObj([3.5]) - res = funky() - assert res.output.out == 1 + outputs = funky() + assert outputs.out == 1 def test_output_spec_func_1(): """the function w/o annotated, but output_spec is used""" @python.define - def testfunc(a): + def TestFunc(a): return a my_output_spec = SpecInfo( @@ -822,9 +818,9 @@ def testfunc(a): bases=(BaseDef,), ) - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out1 == 3.5 + funky = TestFunc(a=3.5, output_spec=my_output_spec) + outputs = funky() + assert outputs.out1 == 3.5 def test_output_spec_func_1a_except(): @@ -833,7 +829,7 @@ def test_output_spec_func_1a_except(): """ @python.define - def testfunc(a): + def TestFunc(a): return a my_output_spec = SpecInfo( @@ -842,7 +838,7 @@ def testfunc(a): bases=(BaseDef,), ) - funky = testfunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5, output_spec=my_output_spec) with pytest.raises(TypeError): funky() @@ -853,7 +849,7 @@ def test_output_spec_func_2(): """ @python.define - def testfunc(a) -> int: + def TestFunc(a) -> int: return a my_output_spec = SpecInfo( @@ -862,9 +858,9 @@ def testfunc(a) -> int: bases=(BaseDef,), ) - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out1 == 3.5 + funky = TestFunc(a=3.5, output_spec=my_output_spec) + outputs = funky() + assert outputs.out1 == 3.5 def test_output_spec_func_2a(): @@ -874,7 +870,7 @@ def test_output_spec_func_2a(): """ @python.define - def testfunc(a) -> int: + def TestFunc(a) -> int: return a my_output_spec = SpecInfo( @@ -883,9 +879,9 @@ def testfunc(a) -> int: bases=(BaseDef,), ) - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out1 == 3.5 + funky = TestFunc(a=3.5, output_spec=my_output_spec) + outputs = funky() + assert outputs.out1 == 3.5 def test_output_spec_func_3(): @@ -894,7 +890,7 @@ def test_output_spec_func_3(): """ @python.define - def testfunc(a, b): + def TestFunc(a, b): return [a, b] my_output_spec = SpecInfo( @@ -908,9 +904,9 @@ def testfunc(a, b): bases=(BaseDef,), ) - funky = testfunc(a=3.5, b=1, output_spec=my_output_spec) - res = funky() - assert res.output.out_list == [3.5, 1] + funky = TestFunc(a=3.5, b=1, output_spec=my_output_spec) + outputs = funky() + assert outputs.out_list == [3.5, 1] def test_output_spec_func_4(): @@ -919,7 +915,7 @@ def test_output_spec_func_4(): """ @python.define - def testfunc(a): + def TestFunc(a): return [a] my_output_spec = SpecInfo( @@ -933,9 +929,9 @@ def testfunc(a): bases=(BaseDef,), ) - funky = testfunc(a=3.5, output_spec=my_output_spec) - res = funky() - assert res.output.out_1el == 3.5 + funky = TestFunc(a=3.5, output_spec=my_output_spec) + outputs = funky() + assert outputs.out_1el == 3.5 def test_exception_func(): @@ -955,8 +951,8 @@ def fun_none(x): return None task = fun_none(name="none", x=3) - res = task() - assert res.output.out is None + outputs = task() + assert outputs.out is None def test_result_none_2(): @@ -967,25 +963,25 @@ def fun_none(x) -> (ty.Any, ty.Any): return None task = fun_none(name="none", x=3) - res = task() - assert res.output.out1 is None - assert res.output.out2 is None + outputs = task() + assert outputs.out1 is None + assert outputs.out2 is None def test_audit_prov( tmpdir, ): @python.define - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b # printing the audit message - funky = testfunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) + funky = TestFunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) funky.cache_dir = tmpdir funky() # saving the audit message into the file - funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) + funky = TestFunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) funky.cache_dir = tmpdir funky() # this should be the default loctaion @@ -998,12 +994,12 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] def test_audit_task(tmpdir): @python.define - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b from glob import glob - funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) + funky = TestFunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) funky.cache_dir = tmpdir funky() message_path = tmpdir / funky.checksum / "messages" @@ -1013,7 +1009,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] data = json.load(f) if "@type" in data: if "AssociatedWith" in data: - assert "testfunc" in data["Label"] + assert "TestFunc" in data["Label"] if "@type" in data: if data["@type"] == "input": @@ -1178,16 +1174,16 @@ def test_audit_prov_messdir_1( """customized messenger dir""" @python.define - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b # printing the audit message - funky = testfunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) + funky = TestFunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) funky.cache_dir = tmpdir funky() # saving the audit message into the file - funky = testfunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) + funky = TestFunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) # user defined path message_path = tmpdir / funky.checksum / "my_messages" funky.cache_dir = tmpdir @@ -1206,18 +1202,18 @@ def test_audit_prov_messdir_2( """customized messenger dir in init""" @python.define - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b # printing the audit message - funky = testfunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) + funky = TestFunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) funky.cache_dir = tmpdir funky() # user defined path (doesn't depend on checksum, can be defined before init) message_path = tmpdir / "my_messages" # saving the audit message into the file - funky = testfunc( + funky = TestFunc( a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), @@ -1238,7 +1234,7 @@ def test_audit_prov_wf( """FileMessenger for wf""" @python.define - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b wf = Workflow( @@ -1248,8 +1244,8 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - wf.add(testfunc(name="testfunc", a=wf.lzin.x)) - wf.set_output([("out", wf.testfunc.lzout.out)]) + wf.add(TestFunc(name="TestFunc", a=wf.lzin.x)) + wf.set_output([("out", wf.TestFunc.lzout.out)]) wf.x = 2 wf(plugin="cf") @@ -1265,10 +1261,10 @@ def test_audit_all( tmpdir, ): @python.define - def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b - funky = testfunc(a=2, audit_flags=AuditFlag.ALL, messengers=FileMessenger()) + funky = TestFunc(a=2, audit_flags=AuditFlag.ALL, messengers=FileMessenger()) message_path = tmpdir / funky.checksum / "messages" funky.cache_dir = tmpdir funky.audit.messenger_args = dict(message_dir=message_path) @@ -1290,35 +1286,35 @@ def test_shell_cmd(tmpdir): # all args given as executable shelly = ShellTask(name="shelly", executable=cmd) assert shelly.cmdline == " ".join(cmd) - res = shelly._run() - assert res.output.stdout == " ".join(cmd[1:]) + "\n" + outputs = shelly() + assert outputs.stdout == " ".join(cmd[1:]) + "\n" # separate command into exec + args shelly = ShellTask(executable=cmd[0], args=cmd[1:]) assert shelly.definition.executable == "echo" assert shelly.cmdline == " ".join(cmd) - res = shelly._run() - assert res.output.return_code == 0 - assert res.output.stdout == " ".join(cmd[1:]) + "\n" + outputs = shelly() + assert outputs.return_code == 0 + assert outputs.stdout == " ".join(cmd[1:]) + "\n" def test_functask_callable(tmpdir): # no submitter or plugin foo = FunAddTwo(a=1) - res = foo() - assert res.output.out == 3 + outputs = foo() + assert outputs.out == 3 assert foo.plugin is None # plugin bar = FunAddTwo(a=2) - res = bar(plugin="cf") - assert res.output.out == 4 + outputs = bar(plugin="cf") + assert outputs.out == 4 assert bar.plugin is None foo2 = FunAddTwo(a=3) foo2.plugin = "cf" - res = foo2() - assert res.output.out == 5 + outputs = foo2() + assert outputs.out == 5 assert foo2.plugin == "cf" @@ -1554,10 +1550,10 @@ def test_object_input(): """Test function tasks with object inputs""" @python.define - def testfunc(a: A): + def TestFunc(a: A): return a.x - outputs = testfunc(a=A(x=7))() + outputs = TestFunc(a=A(x=7))() assert outputs.out == 7 From 6a7265718d235978f5725e69d85df81c5b4f711f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Feb 2025 07:54:10 +1100 Subject: [PATCH 190/342] added definition to Result attributes that need to be cloudpickled --- new-docs/source/tutorial/3-troubleshooting.ipynb | 5 +++-- pydra/engine/specs.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 0edd4b8411..8020d3a3a7 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -128,8 +128,9 @@ "# This workflow will fail because we are trying to divide by 0\n", "wf = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", - "with Submitter(worker=\"cf\") as sub:\n", - " result = sub(wf)\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", " \n", "if result.errored:\n", " print(\"Workflow failed with errors:\\n\" + str(result.errors))\n", diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 9e86a9abc1..1f6253760c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -531,15 +531,19 @@ class Result(ty.Generic[OutputsType]): errored: bool = False definition: TaskDef[OutputsType] | None = None + CLOUD_PICKLE_ATTRS = ("outputs", "definition") + def __getstate__(self): state = attrs_values(self) - if state["outputs"] is not None: - state["outputs"] = cp.dumps(state["outputs"]) + for attr in self.CLOUD_PICKLE_ATTRS: + if state[attr] is not None: + state[attr] = cp.dumps(state[attr]) return state def __setstate__(self, state): - if state["outputs"] is not None: - state["outputs"] = cp.loads(state["outputs"]) + for attr in self.CLOUD_PICKLE_ATTRS: + if state[attr] is not None: + state[attr] = cp.loads(state[attr]) for name, val in state.items(): setattr(self, name, val) From 6ed002f7938c97cfb281848f41204441ce30c587 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Feb 2025 10:22:07 +1100 Subject: [PATCH 191/342] added module bytes_repr serializer --- pydra/design/base.py | 42 ++++++++++++++++--------- pydra/design/python.py | 2 +- pydra/engine/specs.py | 64 ++++++++++++++++++++++++++++++++++----- pydra/engine/submitter.py | 5 ++- pydra/utils/hash.py | 25 +++++++++------ 5 files changed, 104 insertions(+), 34 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 8a8b8889de..3044a7b252 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -23,6 +23,7 @@ MultiOutputObj, MultiOutputFile, ) +from pydra.utils.hash import hash_function if ty.TYPE_CHECKING: @@ -179,6 +180,8 @@ class Field: The converter for the field passed through to the attrs.field, by default it is None validator: callable | iterable[callable], optional The validator(s) for the field passed through to the attrs.field, by default it is None + hash_eq: bool, optional + Whether to use the hash of the value for equality comparison, by default it is False """ name: str | None = None @@ -192,8 +195,9 @@ class Field: requires: list[RequirementSet] = attrs.field( factory=list, converter=requires_converter ) - converter: ty.Callable | None = None - validator: ty.Callable | None = None + converter: ty.Callable[..., ty.Any] | None = None + validator: ty.Callable[..., bool] | None = None + hash_eq: bool = False def requirements_satisfied(self, inputs: "TaskDef") -> bool: """Check if all the requirements are satisfied by the inputs""" @@ -408,6 +412,7 @@ def make_task_def( klass : type The class created using the attrs package """ + spec_type._check_arg_refs(inputs, outputs) for inpt in inputs.values(): @@ -448,7 +453,7 @@ def make_task_def( # Now that we have saved the attributes in lists to be for arg in inputs.values(): # If an outarg input then the field type should be Path not a FileSet - default_kwargs = _get_default(arg) + attrs_kwargs = _get_attrs_kwargs(arg) if isinstance(arg, Out) and is_fileset_or_union(arg.type): if getattr(arg, "path_template", False): if is_optional(arg.type): @@ -456,7 +461,7 @@ def make_task_def( # Will default to None and not be inserted into the command else: field_type = Path | bool - default_kwargs = {"default": True} + attrs_kwargs = {"default": True} elif is_optional(arg.type): field_type = Path | None else: @@ -471,14 +476,14 @@ def make_task_def( validator=make_validator(arg, klass.__name__), metadata={PYDRA_ATTR_METADATA: arg}, on_setattr=attrs.setters.convert, - **default_kwargs, + **attrs_kwargs, ), ) klass.__annotations__[arg.name] = field_type # Create class using attrs package, will create attributes for all columns and # parameters - attrs_klass = attrs.define(auto_attribs=False, kw_only=True)(klass) + attrs_klass = attrs.define(auto_attribs=False, kw_only=True, eq=False)(klass) return attrs_klass @@ -541,13 +546,15 @@ def make_outputs_spec( n: attrs.field( converter=make_converter(o, f"{spec_name}.Outputs"), metadata={PYDRA_ATTR_METADATA: o}, - **_get_default(o), + **_get_attrs_kwargs(o), ) for n, o in outputs.items() }, ) outputs_klass.__annotations__.update((o.name, o.type) for o in outputs.values()) - outputs_klass = attrs.define(auto_attribs=False, kw_only=True)(outputs_klass) + outputs_klass = attrs.define(auto_attribs=False, kw_only=True, eq=False)( + outputs_klass + ) return outputs_klass @@ -972,14 +979,19 @@ def check_explicit_fields_are_none(klass, inputs, outputs): ) -def _get_default(field: Field) -> dict[str, ty.Any]: +def _get_attrs_kwargs(field: Field) -> dict[str, ty.Any]: + kwargs = {} if not hasattr(field, "default"): - return {"factory": nothing_factory} - if field.default is not EMPTY: - return {"default": field.default} - if is_optional(field.type): - return {"default": None} - return {"factory": nothing_factory} + kwargs["factory"] = nothing_factory + elif field.default is not EMPTY: + kwargs["default"] = field.default + elif is_optional(field.type): + kwargs["default"] = None + else: + kwargs["factory"] = nothing_factory + if field.hash_eq: + kwargs["eq"] = hash_function + return kwargs def nothing_factory(): diff --git a/pydra/design/python.py b/pydra/design/python.py index db2d5dd604..f41a4e0106 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -158,7 +158,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: ) parsed_inputs["function"] = arg( - name="function", type=ty.Callable, default=function + name="function", type=ty.Callable, default=function, hash_eq=True ) defn = make_task_def( diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 1f6253760c..95f5a95f1e 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -54,7 +54,7 @@ def is_set(value: ty.Any) -> bool: return value not in (attrs.NOTHING, EMPTY) -@attrs.define +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class TaskOutputs: """Base class for all output definitions""" @@ -113,11 +113,31 @@ def __getitem__(self, name_or_index: str | int) -> ty.Any: f"{self} doesn't have an attribute {name_or_index}" ) from None + def __eq__(self, other: ty.Any) -> bool: + """Check if two task definitions are equal""" + values = attrs.asdict(self) + fields = list_fields(self) + try: + other_values = attrs.asdict(other) + except AttributeError: + return False + try: + other_fields = list_fields(other) + except AttributeError: + return False + if fields != other_fields: + return False + for field in list_fields(self): + if field.hash_eq: + values[field.name] = hash_function(values[field.name]) + other_values[field.name] = hash_function(other_values[field.name]) + return values == other_values + OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class TaskDef(ty.Generic[OutputsType]): """Base class for all task definitions""" @@ -341,6 +361,34 @@ def __iter__(self) -> ty.Generator[str, None, None]: if not (f.name.startswith("_") or f.name in self.RESERVED_FIELD_NAMES) ) + def __eq__(self, other: ty.Any) -> bool: + """Check if two task definitions are equal""" + values = attrs.asdict(self) + try: + other_values = attrs.asdict(other) + except AttributeError: + return False + if set(values) != set(other_values): + return False # Return if attribute keys don't match + for field in list_fields(self): + if field.hash_eq: + values[field.name] = hash_function(values[field.name]) + other_values[field.name] = hash_function(other_values[field.name]) + if values != other_values: + return False + hash_cache = Cache() + if hash_function(type(self), cache=hash_cache) != hash_function( + type(other), cache=hash_cache + ): + return False + try: + other_outputs = other.Outputs + except AttributeError: + return False + return hash_function(self.Outputs, cache=hash_cache) == hash_function( + other_outputs, cache=hash_cache + ) + def __getitem__(self, name: str) -> ty.Any: """Return the value for the given attribute, resolving any templates @@ -595,7 +643,7 @@ class RuntimeSpec: network: bool = False -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class PythonOutputs(TaskOutputs): @classmethod @@ -624,7 +672,7 @@ def _from_task(cls, task: "Task[PythonDef]") -> Self: PythonOutputsType = ty.TypeVar("OutputType", bound=PythonOutputs) -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class PythonDef(TaskDef[PythonOutputsType]): _task_type = "python" @@ -653,7 +701,7 @@ def _run(self, task: "Task[PythonDef]") -> None: ) -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class WorkflowOutputs(TaskOutputs): @classmethod @@ -707,7 +755,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: WorkflowOutputsType = ty.TypeVar("OutputType", bound=WorkflowOutputs) -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class WorkflowDef(TaskDef[WorkflowOutputsType]): _task_type = "workflow" @@ -738,7 +786,7 @@ def construct(self) -> "Workflow": STDERR_HELP = """The standard error stream produced by the command.""" -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class ShellOutputs(TaskOutputs): """Output definition of a generic shell process.""" @@ -899,7 +947,7 @@ def _resolve_value( ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) -@attrs.define(kw_only=True, auto_attribs=False) +@attrs.define(kw_only=True, auto_attribs=False, eq=False) class ShellDef(TaskDef[ShellOutputsType]): _task_type = "shell" diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 980c937102..a8ede93a2b 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -87,7 +87,7 @@ class Submitter: def __init__( self, cache_dir: os.PathLike | None = None, - worker: str | ty.Type[Worker] | Worker = "debug", + worker: str | ty.Type[Worker] | Worker | None = "debug", environment: "Environment | None" = None, rerun: bool = False, cache_locations: list[os.PathLike] | None = None, @@ -98,6 +98,9 @@ def __init__( **kwargs, ): + if worker is None: + worker = "debug" + from . import check_latest_version if Task._etelemetry_version_data is None: diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index a7386e1791..a16b84f63f 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -24,6 +24,7 @@ from filelock import SoftFileLock import attrs.exceptions from fileformats.core.fileset import FileSet, MockMixin +from fileformats.generic import FsObject import fileformats.core.exceptions from . import user_cache_dir, add_exc_note from .misc import in_stdlib @@ -332,17 +333,16 @@ def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: elif hasattr(obj, "__slots__") and obj.__slots__ is not None: dct = {attr: getattr(obj, attr) for attr in obj.__slots__} else: + + def is_special_or_method(n: str): + return (n.startswith("__") and n.endswith("__")) or inspect.ismethod( + getattr(obj, n) + ) + try: - dct = obj.__dict__ + dct = {n: v for n, v in obj.__dict__.items() if not is_special_or_method(n)} except AttributeError: - dct = { - n: getattr(obj, n) - for n in dir(obj) - if not ( - (n.startswith("__") and n.endswith("__")) - or inspect.ismethod(getattr(obj, n)) - ) - } + dct = {n: getattr(obj, n) for n in dir(obj) if not is_special_or_method(n)} yield from bytes_repr_mapping_contents(dct, cache) yield b"}" @@ -456,6 +456,13 @@ def bytes_repr_dict(obj: dict, cache: Cache) -> Iterator[bytes]: yield b"}" +@register_serializer +def bytes_repr_module(obj: types.ModuleType, cache: Cache) -> Iterator[bytes]: + yield b"module:(" + yield hash_single(FsObject(obj.__file__), cache=cache) + yield b")" + + @register_serializer(ty._GenericAlias) @register_serializer(ty._SpecialForm) @register_serializer(type) From 151c32586dab3f3f70003209b0d104bae85def24 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Feb 2025 11:04:53 +1100 Subject: [PATCH 192/342] debugged up python, workflow design unittests --- pydra/design/tests/test_python.py | 14 ++++++++++---- pydra/design/tests/test_workflow.py | 22 +++++++++++++++++----- pydra/design/workflow.py | 2 +- pydra/engine/specs.py | 4 ++-- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index dce89dbf04..e698c79495 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -23,7 +23,7 @@ def func(a: int) -> float: outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), - python.arg(name="function", type=ty.Callable, default=func), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), ] assert outputs == [python.out(name="out", type=float)] definition = SampleDef(a=1) @@ -45,7 +45,7 @@ def func(a: int, k: float = 2.0) -> float: outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int), - python.arg(name="function", type=ty.Callable, default=func), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), python.arg(name="k", type=float, default=2.0), ] assert outputs == [python.out(name="out", type=float)] @@ -69,7 +69,7 @@ def func(a: int) -> float: outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=int, help="The argument to be doubled"), - python.arg(name="function", type=ty.Callable, default=func), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), ] assert outputs == [ python.out(name="b", type=Decimal, help="the doubled output"), @@ -94,7 +94,7 @@ def func(a: int) -> int: outputs = sorted(list_fields(SampleDef.Outputs), key=sort_key) assert inputs == [ python.arg(name="a", type=float), - python.arg(name="function", type=ty.Callable, default=func), + python.arg(name="function", type=ty.Callable, hash_eq=True, default=func), ] assert outputs == [python.out(name="b", type=float)] intf = SampleDef(a=1) @@ -118,6 +118,7 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: python.arg( name="function", type=ty.Callable, + hash_eq=True, default=attrs.fields(SampleDef).function.default, ), ] @@ -149,6 +150,7 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: python.arg( name="function", type=ty.Callable, + hash_eq=True, default=attrs.fields(SampleDef).function.default, ), ] @@ -183,6 +185,7 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: python.arg( name="function", type=ty.Callable, + hash_eq=True, default=attrs.fields(SampleDef).function.default, ), ] @@ -225,6 +228,7 @@ def SampleDef(a: int, b: float) -> tuple[float, float]: python.arg( name="function", type=ty.Callable, + hash_eq=True, default=attrs.fields(SampleDef).function.default, ), ] @@ -272,6 +276,7 @@ def function(a, b): python.arg( name="function", type=ty.Callable, + hash_eq=True, default=attrs.fields(SampleDef).function.default, ), ] @@ -342,6 +347,7 @@ def function(a, b): python.arg( name="function", type=ty.Callable, + hash_eq=True, default=attrs.fields(SampleDef).function.default, ), ] diff --git a/pydra/design/tests/test_workflow.py b/pydra/design/tests/test_workflow.py index 4e49f0e7dd..090182bc42 100644 --- a/pydra/design/tests/test_workflow.py +++ b/pydra/design/tests/test_workflow.py @@ -57,7 +57,9 @@ def MyTestWorkflow(a, b): assert list_fields(MyTestWorkflow) == [ workflow.arg(name="a"), workflow.arg(name="b"), - workflow.arg(name="constructor", type=ty.Callable, default=constructor), + workflow.arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ), ] assert list_fields(MyTestWorkflow.Outputs) == [ workflow.out(name="out"), @@ -108,7 +110,9 @@ def MyTestShellWorkflow( workflow.arg(name="input_video", type=video.Mp4), workflow.arg(name="watermark", type=image.Png), workflow.arg(name="watermark_dims", type=tuple[int, int], default=(10, 10)), - workflow.arg(name="constructor", type=ty.Callable, default=constructor), + workflow.arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ), ] assert list_fields(MyTestShellWorkflow.Outputs) == [ workflow.out(name="output_video", type=video.Mp4), @@ -161,7 +165,9 @@ class Outputs(WorkflowOutputs): assert sorted(list_fields(MyTestWorkflow), key=attrgetter("name")) == [ workflow.arg(name="a", type=int), workflow.arg(name="b", type=float, help="A float input", converter=a_converter), - workflow.arg(name="constructor", type=ty.Callable, default=constructor), + workflow.arg( + name="constructor", type=ty.Callable, hash_eq=True, default=constructor + ), ] assert list_fields(MyTestWorkflow.Outputs) == [ workflow.out(name="out", type=float), @@ -290,7 +296,10 @@ def MyTestWorkflow(a: int, b: float) -> tuple[float, float]: workflow.arg(name="a", type=int, help="An integer input"), workflow.arg(name="b", type=float, help="A float input"), workflow.arg( - name="constructor", type=ty.Callable, default=MyTestWorkflow().constructor + name="constructor", + type=ty.Callable, + hash_eq=True, + default=MyTestWorkflow().constructor, ), ] assert list_fields(MyTestWorkflow.Outputs) == [ @@ -330,7 +339,10 @@ def MyTestWorkflow(a: int, b: float): workflow.arg(name="a", type=int), workflow.arg(name="b", type=float), workflow.arg( - name="constructor", type=ty.Callable, default=MyTestWorkflow().constructor + name="constructor", + type=ty.Callable, + hash_eq=True, + default=MyTestWorkflow().constructor, ), ] assert list_fields(MyTestWorkflow.Outputs) == [ diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 7043da1ca3..68dfcc37d9 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -165,7 +165,7 @@ def make(wrapped: ty.Callable | type) -> TaskDef: ) parsed_inputs["constructor"] = arg( - name="constructor", type=ty.Callable, default=constructor + name="constructor", type=ty.Callable, hash_eq=True, default=constructor ) for inpt_name in lazy: parsed_inputs[inpt_name].lazy = True diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 95f5a95f1e..c1b97ea069 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -363,9 +363,9 @@ def __iter__(self) -> ty.Generator[str, None, None]: def __eq__(self, other: ty.Any) -> bool: """Check if two task definitions are equal""" - values = attrs.asdict(self) + values = attrs.asdict(self, recurse=False) try: - other_values = attrs.asdict(other) + other_values = attrs.asdict(other, recurse=False) except AttributeError: return False if set(values) != set(other_values): From b8a0f1d34cb5c70b124e3ffc4372b2ed6d72fe76 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 10:40:21 +1100 Subject: [PATCH 193/342] debugging test_tasks --- .../tutorial/2-advanced-execution.ipynb | 9 + new-docs/source/tutorial/6-workflow.ipynb | 6 +- pydra/design/base.py | 12 +- pydra/engine/audit.py | 16 +- pydra/engine/core.py | 4 +- pydra/engine/specs.py | 9 +- pydra/engine/submitter.py | 8 +- pydra/engine/task.py | 2 +- pydra/engine/tests/test_dockertask.py | 56 +- pydra/engine/tests/test_environments.py | 65 +- pydra/engine/tests/test_shelltask.py | 230 +++--- .../engine/tests/test_shelltask_inputspec.py | 134 ++-- pydra/engine/tests/test_singularity.py | 38 +- pydra/engine/tests/test_task.py | 738 ++++++++---------- pydra/engine/tests/utils.py | 12 +- pydra/engine/workers.py | 5 +- pydra/utils/tests/utils.py | 4 +- 17 files changed, 642 insertions(+), 706 deletions(-) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index 3fe3dbd2c7..f7cacb4b7a 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -393,6 +393,15 @@ "Work in progress..." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hooks\n", + "\n", + "Work in progress..." + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index 7c2dd20637..6648f8b3e0 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -133,7 +133,7 @@ "def SplitWorkflow(a: list[int], b: list[float]) -> list[float]:\n", " # Multiply over all combinations of the elements of a and b, then combine the results\n", " # for each a element into a list over each b element\n", - " mul = workflow.add(Mul()).split(x=a, y=b).combine(\"x\")\n", + " mul = workflow.add(Mul().split(x=a, y=b).combine(\"x\"))\n", " # Sume the multiplications across all all b elements for each a element\n", " sum = workflow.add(Sum(x=mul.out))\n", " return sum.out" @@ -154,8 +154,8 @@ "source": [ "@workflow.define\n", "def SplitThenCombineWorkflow(a: list[int], b: list[float], c: float) -> list[float]:\n", - " mul = workflow.add(Mul()).split(x=a, y=b)\n", - " add = workflow.add(Add(x=mul.out, y=c)).combine(\"Mul.x\")\n", + " mul = workflow.add(Mul().split(x=a, y=b))\n", + " add = workflow.add(Add(x=mul.out, y=c).combine(\"Mul.x\"))\n", " sum = workflow.add(Sum(x=add.out))\n", " return sum.out" ] diff --git a/pydra/design/base.py b/pydra/design/base.py index 3044a7b252..a8893ab039 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -830,10 +830,16 @@ def extract_function_inputs_and_outputs( inputs = input_types for inpt_name, default in input_defaults.items(): inpt = inputs[inpt_name] - if isinstance(inpt, arg_type) and inpt.default is EMPTY: - inpt.default = default - else: + if isinstance(inpt, arg_type): + if inpt.default is EMPTY: + inpt.default = default + elif inspect.isclass(inpt): inputs[inpt_name] = arg_type(type=inpt, default=default) + else: + raise ValueError( + f"Unrecognised input type ({inpt}) for input {inpt_name} with default " + f"value {default}" + ) return_type = type_hints.get("return", ty.Any) if outputs and len(outputs) > 1: if return_type is not ty.Any: diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 6f39fda1d2..55b357b782 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -1,6 +1,7 @@ """Module to keep track of provenance information.""" import os +import typing as ty import json from pydra.utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag from pydra.engine.helpers import attrs_values @@ -12,6 +13,9 @@ except ImportError: import importlib.resources as importlib_resources # type: ignore +if ty.TYPE_CHECKING: + from pydra.engine.task import Task + class Audit: """Handle provenance tracking and resource utilization.""" @@ -178,17 +182,19 @@ def audit_check(self, flag): """ return self.audit_flags & flag - def audit_task(self, task): + def audit_task(self, task: "Task"): import subprocess as sp - from .helpers import attrs_fields + from .helpers import list_fields label = task.name - command = task.cmdline if hasattr(task.inputs, "executable") else None - attr_list = attrs_fields(task.inputs) + command = ( + task.definition.cmdline if hasattr(task.definition, "executable") else None + ) + attr_list = list_fields(task.definition) for attrs in attr_list: input_name = attrs.name - value = getattr(task.inputs, input_name) + value = task.inputs[input_name] if isinstance(value, FileSet): input_path = os.path.abspath(value) file_hash = hash_function(value) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 331b54d837..644beb6ab4 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -32,7 +32,6 @@ TaskHook, ) from .helpers import ( - create_checksum, attrs_fields, attrs_values, load_result, @@ -203,8 +202,7 @@ def checksum(self): """ if self._checksum is not None: return self._checksum - input_hash = self.definition._hash - self._checksum = create_checksum(self.definition._task_type, input_hash) + self._checksum = self.definition._checksum return self._checksum @property diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index c1b97ea069..739a58b02a 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -159,6 +159,7 @@ def __call__( audit_flags: AuditFlag = AuditFlag.NONE, messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, + name: str | None = None, **kwargs: ty.Any, ) -> OutputsType: """Create a task from this definition and execute it to produce a result. @@ -183,6 +184,8 @@ def __call__( Messengers, by default None messenger_args : dict, optional Messenger arguments, by default None + name : str + The name of the task, by default None **kwargs : dict Keyword arguments to pass on to the worker initialisation @@ -209,7 +212,7 @@ def __call__( worker=worker, **kwargs, ) as sub: - result = sub(self) + result = sub(self, name=name) except TypeError as e: if hasattr(e, "__notes__") and WORKER_KWARG_FAIL_NOTE in e.__notes__: if match := re.match( @@ -412,6 +415,10 @@ def _hash(self): hsh, self._hashes = self._compute_hashes() return hsh + @property + def _checksum(self): + return f"{self._task_type}-{self._hash}" + def _hash_changes(self): """Detects any changes in the hashed values between the current inputs and the previously calculated values""" diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index a8ede93a2b..b1bb46c49d 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -168,9 +168,13 @@ def worker(self): def __call__( self, task_def: "TaskDef", + name: str | None = "task", ): """Submitter run function.""" + if name is None: + name = "task" + task_def._check_rules() # If the outer task is split, create an implicit workflow to hold the split nodes if task_def._splitter: @@ -190,10 +194,10 @@ def Split(defn: TaskDef, output_types: dict): f"Task {self} is marked for combining, but not splitting. " "Use the `split` method to split the task before combining." ) - task = Task(task_def, submitter=self, name="task", environment=self.environment) + task = Task(task_def, submitter=self, name=name, environment=self.environment) try: self.run_start_time = datetime.now() - if task.is_async: # Only workflow tasks can be async + if self.worker.is_async: # Only workflow tasks can be async self.loop.run_until_complete( self.worker.run_async(task, rerun=self.rerun) ) diff --git a/pydra/engine/task.py b/pydra/engine/task.py index f6679747d9..c60ff23b15 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -149,7 +149,7 @@ def __init__( self.inputs_mod_root = {} -class BoshTask(ShellTask): +class BoshTask(ShellDef): def _command_args_single(self, state_ind=None, index=None): """Get command line arguments for a single state""" diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index a80089f5a5..7d54f26805 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,9 +1,10 @@ import pytest -from ..task import ShellTask -from ..submitter import Submitter +from pydra.engine.specs import ShellDef +from pydra.engine.submitter import Submitter from fileformats.generic import File -from ..environments import Docker +from pydra.engine.environments import Docker from pydra.design import shell, workflow +from pydra.engine.core import Task from .utils import no_win, need_docker, result_submitter, result_no_submitter @@ -14,13 +15,19 @@ def test_docker_1_nosubm(): no submitter """ cmd = "whoami" - docky = shell.define(cmd)(environment=Docker(image="busybox")) - assert docky.environment.image == "busybox" - assert docky.environment.tag == "latest" - assert isinstance(docky.environment, Docker) - assert docky.cmdline == cmd + Docky = shell.define(cmd) + docky = Docky() + docky_task = Task( + definition=docky, + name="docky", + submitter=Submitter(environment=Docker(image="busybox")), + ) + assert docky_task.environment.image == "busybox" + assert docky_task.environment.tag == "latest" + assert isinstance(docky_task.environment, Docker) + assert docky_task.cmdline == cmd - res = docky() + res = docky_task() assert res.output.stdout == "root\n" assert res.output.return_code == 0 @@ -32,14 +39,14 @@ def test_docker_1(plugin): using submitter """ cmd = "whoami" - docky = shell.define(cmd)(environment=Docker(image="busybox")) + Docky = shell.define(cmd) + docky = Docky() - with Submitter(worker=plugin) as sub: - docky(submitter=sub) + with Submitter(environment=Docker(image="busybox")) as sub: + res = sub(docky) - res = docky.result() - assert res.output.stdout == "root\n" - assert res.output.return_code == 0 + assert res.outputs.stdout == "root\n" + assert res.outputs.return_code == 0 @no_win @@ -50,7 +57,8 @@ def test_docker_2(results_function, plugin): with and without submitter """ cmdline = "echo hail pydra" - docky = shell.define(cmdline)(environment=Docker(image="busybox")) + Docky = shell.define(cmdline) + docky = Docky() # cmdline doesn't know anything about docker assert docky.cmdline == cmdline res = results_function(docky, plugin) @@ -68,13 +76,9 @@ def test_docker_2a(results_function, plugin): cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - docky = ShellTask( - name="docky", - executable=cmd_exec, - args=cmd_args, - environment=Docker(image="busybox"), - ) - assert docky.definition.executable == "echo" + Docky = shell.define(" ".join([cmd_exec] + cmd_args)) + docky = Docky() + assert docky.executable == "echo" assert docky.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" res = results_function(docky, plugin) @@ -93,9 +97,9 @@ def test_docker_st_1(results_function, plugin): splitter = executable """ cmd = ["pwd", "whoami"] - docky = ShellTask(name="docky", environment=Docker(image="busybox")).split( - "executable", executable=cmd - ) + Docky = shell.define("placeholder") + docky = Docky().split(executable=cmd) + assert docky.state.splitter == "docky.executable" res = results_function(docky, plugin) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 6114bf6c91..9555b04e64 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,10 +1,12 @@ from pathlib import Path from ..environments import Native, Docker, Singularity -from ..task import ShellTask +from ..task import ShellDef from ..submitter import Submitter from fileformats.generic import File from pydra.design import shell +from pydra.engine.core import Task +from pydra.engine.helpers import attrs_values from .utils import no_win, need_docker, need_singularity import pytest @@ -17,28 +19,31 @@ def makedir(path, name): def test_native_1(tmp_path): """simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] - shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) - assert shelly.cmdline == " ".join(cmd) + def newcache(x): + return makedir(tmp_path, x) - env_res = Native().execute(shelly) - shelly() - assert env_res == shelly.output_ + cmd = "whoami" + ShellDef = shell.define(cmd) - shelly_call = ShellTask( - name="shelly_call", executable=cmd, cache_dir=newcache("shelly_call") + shelly = ShellDef() + assert shelly.cmdline == cmd + shelly_task = Task( + definition=shelly, + submitter=Submitter(cache_dir=newcache("shelly-task")), + name="shelly", ) - shelly_call(environment=Native()) - assert env_res == shelly_call.output_ - shelly_subm = ShellTask( - name="shelly_subm", executable=cmd, cache_dir=newcache("shelly_subm") - ) - with Submitter(worker="cf") as sub: - shelly_subm(submitter=sub, environment=Native()) - assert env_res == shelly_subm.result().output.__dict__ + env_outputs = Native().execute(shelly_task) + outputs = shelly(cache_dir=newcache("shelly-exec")) + assert env_outputs == attrs_values(outputs) + + outputs = shelly(environment=Native()) + assert env_outputs == attrs_values(outputs) + + with Submitter(cache_dir=newcache("shelly-submitter"), environment=Native()) as sub: + result = sub(shelly) + assert env_outputs == attrs_values(result.outputs) @no_win @@ -49,11 +54,11 @@ def test_docker_1(tmp_path): cmd = ["whoami"] docker = Docker(image="busybox") - shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) + shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = docker.execute(shelly) - shelly_env = ShellTask( + shelly_env = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -62,7 +67,7 @@ def test_docker_1(tmp_path): shelly_env() assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ - shelly_call = ShellTask( + shelly_call = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) shelly_call(environment=docker) @@ -85,11 +90,11 @@ def test_docker_1_subm(tmp_path, docker): cmd = ["whoami"] docker = Docker(image="busybox") - shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) + shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = docker.execute(shelly) - shelly_env = ShellTask( + shelly_env = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -99,7 +104,7 @@ def test_docker_1_subm(tmp_path, docker): shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ - shelly_call = ShellTask( + shelly_call = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) with Submitter(worker="cf") as sub: @@ -115,11 +120,11 @@ def test_singularity_1(tmp_path): cmd = ["whoami"] sing = Singularity(image="docker://alpine") - shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) + shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = sing.execute(shelly) - shelly_env = ShellTask( + shelly_env = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -128,7 +133,7 @@ def test_singularity_1(tmp_path): shelly_env() assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ - shelly_call = ShellTask( + shelly_call = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) shelly_call(environment=sing) @@ -143,11 +148,11 @@ def test_singularity_1_subm(tmp_path, plugin): cmd = ["whoami"] sing = Singularity(image="docker://alpine") - shelly = ShellTask(name="shelly", executable=cmd, cache_dir=newcache("shelly")) + shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) assert shelly.cmdline == " ".join(cmd) env_res = sing.execute(shelly) - shelly_env = ShellTask( + shelly_env = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_env"), @@ -157,7 +162,7 @@ def test_singularity_1_subm(tmp_path, plugin): shelly_env(submitter=sub) assert env_res == shelly_env.result().output.__dict__ - shelly_call = ShellTask( + shelly_call = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") ) with Submitter(worker=plugin) as sub: diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 117489b3fd..d32d5d32ff 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -7,7 +7,7 @@ import re import stat -from ..task import ShellTask +from ..task import ShellDef from ..submitter import Submitter from ..specs import ( ShellOutputs, @@ -32,7 +32,7 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): """simple command, no arguments""" cmd = ["pwd"] - shelly = ShellTask(name="shelly", executable=cmd, cache_dir=tmp_path) + shelly = ShellDef(name="shelly", executable=cmd, cache_dir=tmp_path) assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin=plugin_dask_opt) @@ -47,7 +47,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): strip option to remove \n at the end os stdout """ cmd = ["pwd"] - shelly = ShellTask(name="shelly", executable=cmd, strip=True) + shelly = ShellDef(name="shelly", executable=cmd, strip=True) shelly.cache_dir = tmp_path assert shelly.cmdline == " ".join(cmd) @@ -61,7 +61,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): def test_shell_cmd_2(plugin, results_function, tmp_path): """a command with arguments, cmd and args given as executable""" cmd = ["echo", "hail", "pydra"] - shelly = ShellTask(name="shelly", executable=cmd) + shelly = ShellDef(name="shelly", executable=cmd) shelly.cache_dir = tmp_path assert shelly.cmdline == " ".join(cmd) @@ -77,7 +77,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) + shelly = ShellDef(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path assert shelly.definition.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) @@ -94,7 +94,7 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_args = "pydra" # separate command into exec + args - shelly = ShellTask(name="shelly", executable=cmd_exec, args=cmd_args) + shelly = ShellDef(name="shelly", executable=cmd_exec, args=cmd_args) shelly.cache_dir = tmp_path assert shelly.definition.executable == "echo" assert shelly.cmdline == "echo pydra" @@ -116,7 +116,7 @@ def test_shell_cmd_3(plugin_dask_opt, tmp_path): cmd = ["pwd", "whoami"] # all args given as executable - shelly = ShellTask(name="shelly").split("executable", executable=cmd) + shelly = ShellDef(name="shelly").split("executable", executable=cmd) shelly.cache_dir = tmp_path # assert shelly.cmdline == ["pwd", "whoami"] @@ -138,7 +138,7 @@ def test_shell_cmd_4(plugin, tmp_path): cmd_exec = "echo" cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellTask(name="shelly", executable=cmd_exec).split( + shelly = ShellDef(name="shelly", executable=cmd_exec).split( splitter="args", args=cmd_args ) shelly.cache_dir = tmp_path @@ -163,7 +163,7 @@ def test_shell_cmd_5(plugin, tmp_path): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellTask(name="shelly", executable=cmd_exec) + ShellDef(name="shelly", executable=cmd_exec) .split(splitter="args", args=cmd_args) .combine("args") ) @@ -185,7 +185,7 @@ def test_shell_cmd_6(plugin, tmp_path): cmd_exec = ["echo", ["echo", "-n"]] cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellTask(name="shelly").split( + shelly = ShellDef(name="shelly").split( splitter=["executable", "args"], executable=cmd_exec, args=cmd_args ) shelly.cache_dir = tmp_path @@ -229,7 +229,7 @@ def test_shell_cmd_7(plugin, tmp_path): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellTask(name="shelly") + ShellDef(name="shelly") .split(splitter=["executable", "args"], executable=cmd_exec, args=cmd_args) .combine("args") ) @@ -255,9 +255,9 @@ def test_wf_shell_cmd_1(plugin, tmp_path): wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) wf.inputs.cmd1 = "pwd" wf.inputs.cmd2 = "ls" - wf.add(ShellTask(name="shelly_pwd", executable=wf.lzin.cmd1, strip=True)) + wf.add(ShellDef(name="shelly_pwd", executable=wf.lzin.cmd1, strip=True)) wf.add( - ShellTask( + ShellDef( name="shelly_ls", executable=wf.lzin.cmd2, args=wf.shelly_pwd.lzout.stdout ) ) @@ -300,7 +300,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, args=cmd_args, @@ -348,7 +348,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, args=cmd_args, @@ -389,7 +389,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, text=hello, @@ -422,7 +422,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, text=hello, @@ -460,7 +460,7 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) shelly.definition.text = hello @@ -493,7 +493,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -527,7 +527,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -557,7 +557,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -581,7 +581,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -612,7 +612,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) @@ -650,7 +650,7 @@ def test_shell_cmd_inputspec_4c_exception(plugin): with pytest.raises( Exception, match=r"default value \('Hello'\) should not be set when the field" ): - ShellTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + ShellDef(name="shelly", executable=cmd_exec, input_spec=my_input_spec) def test_shell_cmd_inputspec_4d_exception(plugin): @@ -680,7 +680,7 @@ def test_shell_cmd_inputspec_4d_exception(plugin): with pytest.raises( Exception, match=r"default value \('Hello'\) should not be set together" ) as excinfo: - ShellTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + ShellDef(name="shelly", executable=cmd_exec, input_spec=my_input_spec) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) @@ -720,7 +720,7 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -768,7 +768,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -816,7 +816,7 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -861,7 +861,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, opt_t=cmd_t, input_spec=my_input_spec ) with pytest.raises(Exception) as excinfo: @@ -904,7 +904,7 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, opt_t=cmd_t, @@ -944,7 +944,7 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, args=args, @@ -989,7 +989,7 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, args=args, @@ -1036,7 +1036,7 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, newfile="newfile_tmp.txt", @@ -1075,7 +1075,7 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, args=args, @@ -1133,7 +1133,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, newfile="newfile_tmp.txt", @@ -1190,7 +1190,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, newfile="newfile_tmp.txt", @@ -1241,7 +1241,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1294,7 +1294,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, file_orig=file ) @@ -1342,7 +1342,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1393,7 +1393,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1445,7 +1445,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1496,7 +1496,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, files=files_list, @@ -1545,7 +1545,7 @@ def test_shell_cmd_inputspec_10_err(tmp_path): ) with pytest.raises(FileNotFoundError): - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, files=file_2, input_spec=my_input_spec ) @@ -1580,7 +1580,7 @@ def test_shell_cmd_inputspec_11(tmp_path): input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellDef,)) output_spec = SpecInfo(name="Output", fields=output_fields, bases=(ShellOutputs,)) - task = ShellTask( + task = ShellDef( name="echoMultiple", executable="touch", input_spec=input_spec, @@ -1656,7 +1656,7 @@ def template_function(inputs): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1700,7 +1700,7 @@ def test_shell_cmd_inputspec_with_iterable(): bases=(ShellDef,), ) - task = ShellTask(name="test", input_spec=input_spec, executable="test") + task = ShellDef(name="test", input_spec=input_spec, executable="test") for iterable_type in (list, tuple): task.definition.iterable_1 = iterable_type(range(3)) @@ -1750,7 +1750,7 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1812,7 +1812,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1890,7 +1890,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -1932,7 +1932,7 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, @@ -1967,7 +1967,7 @@ def test_shell_cmd_inputspec_typeval_1(): ) with pytest.raises(TypeError): - ShellTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) + ShellDef(executable=cmd_exec, text="hello", input_spec=my_input_spec) def test_shell_cmd_inputspec_typeval_2(): @@ -1983,7 +1983,7 @@ def test_shell_cmd_inputspec_typeval_2(): ) with pytest.raises(TypeError): - ShellTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) + ShellDef(executable=cmd_exec, text="hello", input_spec=my_input_spec) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) @@ -2005,7 +2005,7 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, @@ -2043,7 +2043,7 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -2089,7 +2089,7 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd_exec, input_spec=my_input_spec, @@ -2149,7 +2149,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -2202,7 +2202,7 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): ) wf.add( - ShellTask( + ShellDef( name="shelly", input_spec=my_input_spec, executable=wf.lzin.cmd, @@ -2249,7 +2249,7 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): ) wf.add( - ShellTask( + ShellDef( name="shelly", input_spec=my_input_spec, executable=wf.lzin.cmd, @@ -2327,7 +2327,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ) wf.add( - ShellTask( + ShellDef( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, @@ -2335,7 +2335,7 @@ def test_wf_shell_cmd_3(plugin, tmp_path): ) ) wf.add( - ShellTask( + ShellDef( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2424,7 +2424,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ) wf.add( - ShellTask( + ShellDef( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, @@ -2432,7 +2432,7 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): ) ) wf.add( - ShellTask( + ShellDef( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2519,7 +2519,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ) wf.add( - ShellTask( + ShellDef( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, @@ -2527,7 +2527,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): ) ) wf.add( - ShellTask( + ShellDef( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2617,14 +2617,14 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): ) wf.add( - ShellTask( + ShellDef( name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, ).split("args", args=wf.lzin.args) ) wf.add( - ShellTask( + ShellDef( name="shelly2", input_spec=my_input_spec2, executable=wf.lzin.cmd2, @@ -2665,7 +2665,7 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): fields=[("newfile", File, "newfile_tmp.txt")], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2685,7 +2685,7 @@ def test_shell_cmd_outputspec_1a(plugin, results_function, tmp_path): fields=[("newfile", attr.ib(type=File, default="newfile_tmp.txt"))], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2704,7 +2704,7 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): fields=[("newfile", File, "newfile_tmp_.txt")], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2726,7 +2726,7 @@ def test_shell_cmd_outputspec_2(plugin, results_function, tmp_path): fields=[("newfile", File, "newfile_*.txt")], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2746,7 +2746,7 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): fields=[("newfile", File, "newfile_*K.txt")], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2768,7 +2768,7 @@ def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): fields=[("newfile", MultiOutputFile, "newfile_*.txt")], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2802,7 +2802,7 @@ def gather_output(field, output_dir): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2841,7 +2841,7 @@ def gather_output(executable, output_dir): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) @@ -2869,7 +2869,7 @@ def gather_output(executable, output_dir, ble): fields=[("newfile", attr.ib(type=File, metadata={"callable": gather_output}))], bases=(ShellOutputs,), ) - shelly = ShellTask(name="shelly", executable=cmd, output_spec=my_output_spec) + shelly = ShellDef(name="shelly", executable=cmd, output_spec=my_output_spec) with pytest.raises(AttributeError, match="ble"): shelly() @@ -2890,7 +2890,7 @@ def gather_output(executable, output_dir): newfile: MultiOutputFile = attr.ib(metadata={"callable": gather_output}) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"], output_spec=SpecInfo(name="Output", bases=(MyOutputDef,)), @@ -2930,7 +2930,7 @@ def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, args=args, @@ -2963,7 +2963,7 @@ def test_shell_cmd_outputspec_6a(): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, args=args, output_spec=my_output_spec ) @@ -3033,7 +3033,7 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3109,7 +3109,7 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3178,7 +3178,7 @@ def get_stderr(stderr): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ).split("args", args=args) @@ -3206,7 +3206,7 @@ def test_shell_cmd_outputspec_8b_error(): ], bases=(ShellOutputs,), ) - shelly = ShellTask(name="shelly", executable=cmd, output_spec=my_output_spec).split( + shelly = ShellDef(name="shelly", executable=cmd, output_spec=my_output_spec).split( "args", args=args ) with pytest.raises(Exception) as e: @@ -3243,7 +3243,7 @@ def get_lowest_directory(directory_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, @@ -3304,7 +3304,7 @@ def get_lowest_directory(directory_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name=cmd, executable=cmd, input_spec=my_input_spec, @@ -3351,7 +3351,7 @@ def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, output_spec=my_output_spec, @@ -3383,7 +3383,7 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): fields=[("newfile", File, "newfile_tmp.txt")], bases=(ShellOutputs,), ) - wf.add(ShellTask(name="shelly", executable=wf.lzin.cmd, output_spec=my_output_spec)) + wf.add(ShellDef(name="shelly", executable=wf.lzin.cmd, output_spec=my_output_spec)) wf.set_output( [("stdout", wf.shelly.lzout.stdout), ("newfile", wf.shelly.lzout.newfile)] ) @@ -3436,7 +3436,7 @@ def test_shell_cmd_inputspec_outputspec_1(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3490,7 +3490,7 @@ def test_shell_cmd_inputspec_outputspec_1a(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3551,7 +3551,7 @@ def test_shell_cmd_inputspec_outputspec_2(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3618,7 +3618,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3690,7 +3690,7 @@ def test_shell_cmd_inputspec_outputspec_3(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3751,7 +3751,7 @@ def test_shell_cmd_inputspec_outputspec_3a(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3815,7 +3815,7 @@ def test_shell_cmd_inputspec_outputspec_4(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3870,7 +3870,7 @@ def test_shell_cmd_inputspec_outputspec_4a(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3925,7 +3925,7 @@ def test_shell_cmd_inputspec_outputspec_5(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -3979,7 +3979,7 @@ def test_shell_cmd_inputspec_outputspec_5a(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -4033,7 +4033,7 @@ def test_shell_cmd_inputspec_outputspec_5b(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -4082,7 +4082,7 @@ def test_shell_cmd_inputspec_outputspec_6_except(): ], bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( name="shelly", executable=cmd, input_spec=my_input_spec, @@ -4329,7 +4329,7 @@ def change_name(file): in_file = data_tests_dir / "test.nii.gz" # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( name="bet_task", executable="bet", in_file=in_file, input_spec=bet_input_spec ) out_file = shelly.output_dir / "test_brain.nii.gz" @@ -4376,7 +4376,7 @@ def test_shell_cmd_optional_output_file1(tmp_path): bases=(ShellDef,), ) - my_cp = ShellTask( + my_cp = ShellDef( name="my_cp", executable="cp", input_spec=my_cp_spec, @@ -4414,7 +4414,7 @@ def test_shell_cmd_optional_output_file2(tmp_path): bases=(ShellDef,), ) - my_cp = ShellTask( + my_cp = ShellDef( name="my_cp", executable="cp", input_spec=my_cp_spec, @@ -4478,7 +4478,7 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( cache_dir=tmp_path, executable="echo", input_spec=input_spec, @@ -4540,7 +4540,7 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4607,7 +4607,7 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4675,7 +4675,7 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4728,7 +4728,7 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( cache_dir=tmp_path, executable="echo", input_spec=input_spec, @@ -4782,7 +4782,7 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): bases=(ShellOutputs,), ) - shelly = ShellTask( + shelly = ShellDef( cache_dir=tmp_path, executable="touch", input_spec=input_spec, @@ -4855,11 +4855,11 @@ def formatter_1(inputs): return f"-t [{inputs['in1']}, {inputs['in2']}]" input_spec = spec_info(formatter_1) - shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" # testing that the formatter can overwrite a provided value for together. - shelly = ShellTask( + shelly = ShellDef( executable="exec", input_spec=input_spec, in1="i1", @@ -4875,7 +4875,7 @@ def formatter_2(in1, in2): input_spec = spec_info(formatter_2) - shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" def formatter_3(in1, in3): @@ -4884,7 +4884,7 @@ def formatter_3(in1, in3): input_spec = spec_info(formatter_3) - shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") with pytest.raises(Exception) as excinfo: shelly.cmdline assert ( @@ -4900,7 +4900,7 @@ def formatter_5(field): input_spec = spec_info(formatter_5) - shelly = ShellTask( + shelly = ShellDef( executable="exec", input_spec=input_spec, in1="i1", @@ -4917,7 +4917,7 @@ def formatter_4(field): input_spec = spec_info(formatter_4) - shelly = ShellTask(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") assert shelly.cmdline == "exec" @@ -4969,7 +4969,7 @@ def formatter_1(in1, in2): input_spec = spec_info(formatter_1) in1 = ["in11", "in12"] - shelly = ShellTask( + shelly = ShellDef( name="f", executable="executable", input_spec=input_spec, in2="in2" ).split("in1", in1=in1) assert shelly is not None @@ -5016,7 +5016,7 @@ def test_shellcommand_error_msg(tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( name="err_msg", executable=str(script_path), input_spec=input_spec, in1="hello" ) diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 0a54ce83a2..272231715b 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -2,8 +2,6 @@ from pathlib import Path import attr import pytest - -from ..task import ShellTask from pydra.engine.specs import ShellOutputs, ShellDef from fileformats.generic import File from pydra.design import shell @@ -11,14 +9,14 @@ def test_shell_cmd_execargs_1(): # separate command into exec + args - shelly = ShellTask(executable="executable", args="arg") + shelly = ShellDef(executable="executable", args="arg") assert shelly.cmdline == "executable arg" assert shelly.name == "ShellTask_noname" def test_shell_cmd_execargs_2(): # separate command into exec + args - shelly = ShellTask(executable=["cmd_1", "cmd_2"], args="arg") + shelly = ShellDef(executable=["cmd_1", "cmd_2"], args="arg") assert shelly.cmdline == "cmd_1 cmd_2 arg" @@ -38,7 +36,7 @@ def test_shell_cmd_inputs_1(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", args="arg", inpA="inp1", input_spec=my_input_spec ) assert shelly.cmdline == "executable inp1 arg" @@ -52,7 +50,7 @@ def test_shell_cmd_inputs_1a(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", args="arg", inpA="inpNone1", input_spec=my_input_spec ) # inp1 should be the first one after executable @@ -76,7 +74,7 @@ def test_shell_cmd_inputs_1b(): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( executable="executable", args="arg", inpA="inp-1", input_spec=my_input_spec ) # inp1 should be last before arg @@ -99,7 +97,7 @@ def test_shell_cmd_inputs_1_st(): bases=(ShellDef,), ) - ShellTask( + ShellDef( name="shelly", executable="executable", args="arg", @@ -134,7 +132,7 @@ def test_shell_cmd_inputs_2(): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpB="inp1", inpA="inp2", input_spec=my_input_spec ) assert shelly.cmdline == "executable inp1 inp2" @@ -152,7 +150,7 @@ def test_shell_cmd_inputs_2a(): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA="inpNone1", inpB="inpNone2", @@ -185,7 +183,7 @@ def test_shell_cmd_inputs_2_err(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA="inp1", inpB="inp2", input_spec=my_input_spec ) with pytest.raises(Exception) as e: @@ -218,7 +216,7 @@ def test_shell_cmd_inputs_2_noerr(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", inpA="inp1", input_spec=my_input_spec) + shelly = ShellDef(executable="executable", inpA="inp1", input_spec=my_input_spec) shelly.cmdline @@ -247,7 +245,7 @@ def test_shell_cmd_inputs_3(): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA="inp1", inpB="inp-1", @@ -274,7 +272,7 @@ def test_shell_cmd_inputs_argstr_1(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", inpA="inp1", input_spec=my_input_spec) + shelly = ShellDef(executable="executable", inpA="inp1", input_spec=my_input_spec) # flag used before inp1 assert shelly.cmdline == "executable -v inp1" @@ -296,7 +294,7 @@ def test_shell_cmd_inputs_argstr_2(): ) # separate command into exec + args - shelly = ShellTask( + shelly = ShellDef( executable="executable", args="arg", inpA=True, input_spec=my_input_spec ) # a flag is used without any additional argument @@ -319,7 +317,7 @@ def test_shell_cmd_inputs_list_1(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec ) # multiple elements @@ -342,7 +340,7 @@ def test_shell_cmd_inputs_list_2(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec ) assert shelly.cmdline == "executable -v el_1 el_2 el_3" @@ -364,7 +362,7 @@ def test_shell_cmd_inputs_list_3(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec ) # a flag is repeated @@ -392,7 +390,7 @@ def test_shell_cmd_inputs_list_sep_1(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -422,7 +420,7 @@ def test_shell_cmd_inputs_list_sep_2(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -452,7 +450,7 @@ def test_shell_cmd_inputs_list_sep_2a(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -482,7 +480,7 @@ def test_shell_cmd_inputs_list_sep_3(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -512,7 +510,7 @@ def test_shell_cmd_inputs_list_sep_3a(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec, @@ -542,7 +540,7 @@ def test_shell_cmd_inputs_sep_4(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", inpA=["aaa"], input_spec=my_input_spec) + shelly = ShellDef(executable="executable", inpA=["aaa"], input_spec=my_input_spec) assert shelly.cmdline == "executable -v aaa" @@ -567,7 +565,7 @@ def test_shell_cmd_inputs_sep_4a(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", inpA="aaa", input_spec=my_input_spec) + shelly = ShellDef(executable="executable", inpA="aaa", input_spec=my_input_spec) assert shelly.cmdline == "executable -v aaa" @@ -591,7 +589,7 @@ def test_shell_cmd_inputs_format_1(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", inpA="aaa", input_spec=my_input_spec) + shelly = ShellDef(executable="executable", inpA="aaa", input_spec=my_input_spec) assert shelly.cmdline == "executable -v aaa" @@ -615,7 +613,7 @@ def test_shell_cmd_inputs_format_2(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", inpA=["el_1", "el_2"], input_spec=my_input_spec, @@ -643,7 +641,7 @@ def test_shell_cmd_inputs_format_3(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", inpA=0.007, input_spec=my_input_spec) + shelly = ShellDef(executable="executable", inpA=0.007, input_spec=my_input_spec) assert shelly.cmdline == "executable -v 0.00700" @@ -668,7 +666,7 @@ def test_shell_cmd_inputs_mandatory_1(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec) + shelly = ShellDef(executable="executable", input_spec=my_input_spec) with pytest.raises(Exception) as e: shelly.cmdline assert "mandatory" in str(e.value) @@ -711,7 +709,7 @@ def test_shell_cmd_inputs_not_given_1(): ], bases=(ShellDef,), ) - shelly = ShellTask(name="shelly", executable="executable", input_spec=my_input_spec) + shelly = ShellDef(name="shelly", executable="executable", input_spec=my_input_spec) shelly.definition.arg2 = "argument2" @@ -751,7 +749,7 @@ def test_shell_cmd_inputs_template_1(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" @@ -790,7 +788,7 @@ def test_shell_cmd_inputs_template_1a(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") # outA has no argstr in metadata, so it's not a part of the command line assert shelly.cmdline == "executable inpA" @@ -824,7 +822,7 @@ def test_shell_cmd_inputs_template_2(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec) + shelly = ShellDef(executable="executable", input_spec=my_input_spec) # inpB not in the inputs, so no outB in the command line assert shelly.cmdline == "executable" # checking if outB in the output fields @@ -902,7 +900,7 @@ def test_shell_cmd_inputs_template_3(tmp_path): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA=inpA, inpB=inpB ) # using syntax from the outAB field @@ -982,7 +980,7 @@ def test_shell_cmd_inputs_template_3a(): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", inpB="inpB" ) # using syntax from the outAB field @@ -1058,7 +1056,7 @@ def test_shell_cmd_inputs_template_4(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") # inpB is not provided so outB not in the command line assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] @@ -1085,7 +1083,7 @@ def test_shell_cmd_inputs_template_5_ex(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, outAB="outAB") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, outAB="outAB") with pytest.raises(Exception) as e: shelly.cmdline assert "read only" in str(e.value) @@ -1130,23 +1128,23 @@ def test_shell_cmd_inputs_template_6(): # no input for outA (and no default value), so the output is created whenever the # template can be formatted (the same way as for templates that has type=str) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # a string is provided for outA, so this should be used as the outA value - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" ) assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True ) assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False ) assert shelly.cmdline == "executable inpA" @@ -1190,23 +1188,23 @@ def test_shell_cmd_inputs_template_6a(): ) # no input for outA, but default is False, so the outA shouldn't be used - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") assert shelly.cmdline == "executable inpA" # a string is provided for outA, so this should be used as the outA value - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" ) assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True ) assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False ) assert shelly.cmdline == "executable inpA" @@ -1249,9 +1247,7 @@ def test_shell_cmd_inputs_template_7(tmp_path: Path): inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( @@ -1298,9 +1294,7 @@ def test_shell_cmd_inputs_template_7a(tmp_path: Path): inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( @@ -1347,9 +1341,7 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( @@ -1393,9 +1385,7 @@ def test_shell_cmd_inputs_template_8(tmp_path: Path): inpA_file = tmp_path / "a_file.t" inpA_file.write_text("content") - shelly = ShellTask( - executable="executable", input_spec=my_input_spec, inpA=inpA_file - ) + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) # outA should be formatted in a way that inpA extension is removed and the template extension is used assert ( @@ -1454,7 +1444,7 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpInt=3 ) @@ -1516,7 +1506,7 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpStr="hola" ) @@ -1581,7 +1571,7 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): inpFile_file = tmp_path / "inpFile.t" inpFile_file.write_text("content") - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA=inpA_file, @@ -1642,7 +1632,7 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA=inpA_file, @@ -1687,7 +1677,7 @@ def test_shell_cmd_inputs_template_10(): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA=3.3456) + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=3.3456) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line assert shelly.cmdline == f"executable 3.3 -o {shelly.output_dir / 'file_3.3_out'}" @@ -1737,7 +1727,7 @@ def test_shell_cmd_inputs_template_requires_1(): ) # When requirements are not met. - shelly = ShellTask(executable="cmd", input_spec=my_input_spec, in_file="in.file") + shelly = ShellDef(executable="cmd", input_spec=my_input_spec, in_file="in.file") assert "--tpl" not in shelly.cmdline # When requirements are met. @@ -1785,7 +1775,7 @@ def template_fun(inputs): bases=(ShellDef,), ) - shelly = ShellTask(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" @@ -1843,7 +1833,7 @@ def template_fun(inputs): bases=(ShellDef,), ) - shelly = ShellTask( + shelly = ShellDef( executable="executable", input_spec=my_input_spec, inpA="inpA", @@ -1889,7 +1879,7 @@ def test_shell_cmd_inputs_template_1_st(): ) inpA = ["inpA_1", "inpA_2"] - ShellTask( + ShellDef( name="f", executable="executable", input_spec=my_input_spec, @@ -2087,13 +2077,13 @@ def test_shell_cmd_inputs_denoise_image( my_input_file.write_text("content") # no input provided - shelly = ShellTask(executable="DenoiseImage", input_spec=my_input_spec) + shelly = ShellDef(executable="DenoiseImage", input_spec=my_input_spec) with pytest.raises(Exception) as e: shelly.cmdline assert "mandatory" in str(e.value) # input file name, noiseImage is not set, so using default value False - shelly = ShellTask( + shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2104,7 +2094,7 @@ def test_shell_cmd_inputs_denoise_image( ) # input file name, noiseImage is set to True, so template is used in the output - shelly = ShellTask( + shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2116,7 +2106,7 @@ def test_shell_cmd_inputs_denoise_image( ) # input file name and help_short - shelly = ShellTask( + shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, help_short=True, @@ -2136,7 +2126,7 @@ def test_shell_cmd_inputs_denoise_image( ] # adding image_dimensionality that has allowed_values [2, 3, 4] - shelly = ShellTask( + shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, @@ -2149,7 +2139,7 @@ def test_shell_cmd_inputs_denoise_image( # adding image_dimensionality that has allowed_values [2, 3, 4] and providing 5 - exception should be raised with pytest.raises(ValueError) as excinfo: - shelly = ShellTask( + shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, input_spec=my_input_spec, diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 0d8cd4cb07..8c21d44289 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -3,7 +3,7 @@ import pytest import attr -from ..task import ShellTask +from ..task import ShellDef from ..submitter import Submitter from ..specs import ShellOutputs, ShellDef from fileformats.generic import File @@ -30,7 +30,7 @@ def test_singularity_1_nosubm(tmp_path): """ cmd = "pwd" image = "docker://alpine" - singu = ShellTask( + singu = ShellDef( name="singu", executable=cmd, environment=Singularity(image=image), @@ -52,7 +52,7 @@ def test_singularity_2_nosubm(tmp_path): """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = ShellTask( + singu = ShellDef( name="singu", executable=cmd, environment=Singularity(image=image), @@ -73,7 +73,7 @@ def test_singularity_2(plugin, tmp_path): cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = ShellTask( + singu = ShellDef( name="singu", executable=cmd, environment=Singularity(image=image), @@ -97,7 +97,7 @@ def test_singularity_2a(plugin, tmp_path): cmd_args = ["hail", "pydra"] # separate command into exec + args image = "docker://alpine" - singu = ShellTask( + singu = ShellDef( name="singu", executable=cmd_exec, args=cmd_args, @@ -123,7 +123,7 @@ def test_singularity_st_1(plugin, tmp_path): """ cmd = ["pwd", "ls"] image = "docker://alpine" - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), cache_dir=tmp_path ).split("executable", executable=cmd) assert singu.state.splitter == "singu.executable" @@ -145,7 +145,7 @@ def test_singularity_st_2(tmp_path, n): """splitter over args (checking bigger splitters if slurm available)""" args_n = list(range(n)) image = "docker://alpine" - singu = ShellTask( + singu = ShellDef( name="singu", executable="echo", environment=Singularity(image=image), @@ -175,7 +175,7 @@ def test_singularity_outputspec_1(plugin, tmp_path): fields=[("newfile", File, "newfile_tmp.txt")], bases=(ShellOutputs,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -223,7 +223,7 @@ def test_singularity_inputspec_1(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -264,7 +264,7 @@ def test_singularity_inputspec_1a(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -321,7 +321,7 @@ def test_singularity_inputspec_2(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -381,7 +381,7 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -441,7 +441,7 @@ def test_singularity_inputspec_2a(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -498,7 +498,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -554,7 +554,7 @@ def test_singularity_inputspec_state_1(tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -604,7 +604,7 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): bases=(ShellDef,), ) - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=cmd, @@ -651,7 +651,7 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): wf.inputs.cmd = cmd wf.inputs.file = filename - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=wf.lzin.cmd, @@ -706,7 +706,7 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) wf.inputs.cmd = cmd - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=wf.lzin.cmd, @@ -764,7 +764,7 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): wf.inputs.cmd = cmd wf.inputs.file = filename - singu = ShellTask( + singu = ShellDef( name="singu", environment=Singularity(image=image), executable=wf.lzin.cmd, diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 95dc0c3ccf..efa993ab6a 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1,26 +1,26 @@ import typing as ty import os import sys -import attr +import attrs +import shutil import pytest import cloudpickle as cp from pathlib import Path import json import glob as glob -from pydra.design import python +from pydra.design import python, shell, workflow from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages -from ..task import AuditFlag, ShellTask -from pydra.engine.specs import argstr_formatting +from ..task import AuditFlag +from pydra.engine.specs import argstr_formatting, ShellDef, ShellOutputs from pydra.engine.helpers import list_fields, print_help +from pydra.engine.submitter import Submitter +from pydra.engine.core import Task from .utils import BasicWorkflow from pydra.utils import default_run_cache_dir from pydra.utils.typing import ( MultiInputObj, MultiOutputObj, ) -from ..specs import ( - ShellDef, -) from fileformats.generic import File from pydra.utils.hash import hash_function @@ -59,7 +59,7 @@ def test_numpy(): def test_checksum(): nn = FunAddTwo(a=3) assert ( - nn.checksum + nn._checksum == "PythonTask_abb4e7cc03b13d0e73884b87d142ed5deae6a312275187a9d8df54407317d7d3" ) @@ -407,11 +407,9 @@ def test_annotated_func_multreturn_exception(): but three element provided in the definition - should raise an error """ - @python.define + @python.define(outputs={"fractional": float, "integer": int, "who_knows": int}) def TestFunc( a: float, - ) -> ty.NamedTuple( - "Output", [("fractional", float), ("integer", int), ("who_knows", int)] ): import math @@ -423,7 +421,11 @@ def TestFunc( assert "expected 3 elements" in str(excinfo.value) -def test_halfannotated_func(): +def test_halfannotated_func(tmp_path): + + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + @python.define def TestFunc(a, b) -> int: return a + b @@ -437,34 +439,36 @@ def TestFunc(a, b) -> int: assert getattr(funky, "function") is not None assert set(f.name for f in list_fields(funky.Outputs)) == {"out"} - outputs = funky() + outputs = funky(cache_dir=cache_dir) assert hasattr(outputs, "out") assert outputs.out == 30 - assert os.path.exists( - default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" - ) + assert Path(cache_dir / f"python-{funky._hash}" / "_result.pklz").exists() - funky.result() # should not recompute + funky(cache_dir=cache_dir) # should not recompute funky.a = 11 - assert funky.result() is None - outputs = funky() + assert not Path(cache_dir / f"python-{funky._hash}").exists() + outputs = funky(cache_dir=cache_dir) assert outputs.out == 31 - help = funky.help(returnhelp=True) + help = print_help(funky) assert help == [ "Help for TestFunc", "Input Parameters:", - "- a: _empty", - "- b: _empty", + "- a: Any", + "- b: Any", "Output Parameters:", "- out: int", ] -def test_halfannotated_func_multreturn(): - @python.define - def TestFunc(a, b) -> (int, int): +def test_halfannotated_func_multreturn(tmp_path): + + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + + @python.define(outputs=["out1", "out2"]) + def TestFunc(a, b) -> tuple[int, int]: return a + 1, b + 1 funky = TestFunc(a=10, b=20) @@ -476,26 +480,24 @@ def TestFunc(a, b) -> (int, int): assert getattr(funky, "function") is not None assert set(f.name for f in list_fields(funky.Outputs)) == {"out1", "out2"} - outputs = funky() + outputs = funky(cache_dir=cache_dir) assert hasattr(outputs, "out1") assert outputs.out1 == 11 - assert os.path.exists( - default_run_cache_dir / f"python-{funky._hash}" / "_result.pklz" - ) + assert Path(cache_dir / f"python-{funky._hash}" / "_result.pklz").exists() - funky.result() # should not recompute + funky(cache_dir=cache_dir) # should not recompute funky.a = 11 - assert funky.result() is None - outputs = funky() + assert not Path(cache_dir / f"python-{funky._hash}" / "_result.pklz").exists() + outputs = funky(cache_dir=cache_dir) assert outputs.out1 == 12 - help = funky.help(returnhelp=True) + help = print_help(funky) assert help == [ "Help for TestFunc", "Input Parameters:", - "- a: _empty", - "- b: _empty", + "- a: Any", + "- b: Any", "Output Parameters:", "- out1: int", "- out2: int", @@ -504,38 +506,37 @@ def TestFunc(a, b) -> (int, int): def test_notannotated_func(): @python.define - def no_annots(c, d): + def NoAnnots(c, d): return c + d - natask = no_annots(c=17, d=3.2) - assert hasattr(natask, "c") - assert hasattr(natask, "d") - assert hasattr(natask, "function") + no_annots = NoAnnots(c=17, d=3.2) + assert hasattr(no_annots, "c") + assert hasattr(no_annots, "d") + assert hasattr(no_annots, "function") - result = natask._run() - assert hasattr(result, "output") + outputs = no_annots() assert hasattr(outputs, "out") assert outputs.out == 20.2 def test_notannotated_func_returnlist(): @python.define - def no_annots(c, d): + def NoAnnots(c, d): return [c, d] - natask = no_annots(c=17, d=3.2) - result = natask._run() + no_annots = NoAnnots(c=17, d=3.2) + outputs = no_annots() assert hasattr(outputs, "out") assert outputs.out == [17, 3.2] def test_halfannotated_func_multrun_returnlist(): - @python.define - def no_annots(c, d) -> (list, float): + @python.define(outputs=["out1", "out2"]) + def NoAnnots(c, d) -> tuple[list, float]: return [c, d], c + d - natask = no_annots(c=17, d=3.2) - result = natask._run() + no_annots = NoAnnots(c=17, d=3.2) + outputs = no_annots() assert hasattr(outputs, "out1") assert hasattr(outputs, "out2") @@ -549,16 +550,15 @@ def test_notannotated_func_multreturn(): """ @python.define - def no_annots(c, d): + def NoAnnots(c, d): return c + d, c - d - natask = no_annots(c=17, d=3.2) - assert hasattr(natask, "c") - assert hasattr(natask, "d") - assert hasattr(natask, "function") + no_annots = NoAnnots(c=17, d=3.2) + assert hasattr(no_annots, "c") + assert hasattr(no_annots, "d") + assert hasattr(no_annots, "function") - result = natask._run() - assert hasattr(result, "output") + outputs = no_annots() assert hasattr(outputs, "out") assert outputs.out == (20.2, 13.8) @@ -566,18 +566,12 @@ def no_annots(c, d): def test_input_spec_func_1(): """the function w/o annotated, but input_spec is used""" - @python.define + @python.define(inputs={"a": python.arg(type=float, help="input a")}) def TestFunc(a): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=float, metadata={"help": "input a"}))], - bases=(FunctionDef,), - ) - - funky = TestFunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky, "a") == 3.5 + funky = TestFunc(a=3.5) + assert funky.a == 3.5 def test_input_spec_func_1a_except(): @@ -585,17 +579,12 @@ def test_input_spec_func_1a_except(): a TypeError is raised (float is provided instead of int) """ - @python.define + @python.define(inputs={"a": python.arg(type=int, help="input a")}) def TestFunc(a): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=int, metadata={"help": "input a"}))], - bases=(FunctionDef,), - ) with pytest.raises(TypeError): - TestFunc(a=3.5, input_spec=my_input_spec) + TestFunc(a=3.5) def test_input_spec_func_1b_except(): @@ -603,22 +592,13 @@ def test_input_spec_func_1b_except(): metadata checks raise an error """ - @python.define - def TestFunc(a): - return a + with pytest.raises( + TypeError, match="got an unexpected keyword argument 'position'" + ): - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib(type=float, metadata={"position": 1, "help": "input a"}), - ) - ], - bases=(FunctionDef,), - ) - with pytest.raises(AttributeError, match="only these keys are supported"): - TestFunc(a=3.5, input_spec=my_input_spec) + @python.define(inputs={"a": python.arg(type=float, position=1, help="input a")}) + def TestFunc(a): + return a def test_input_spec_func_1d_except(): @@ -630,9 +610,8 @@ def test_input_spec_func_1d_except(): def TestFunc(a): return a - my_input_spec = SpecInfo(name="Input", fields=[], bases=(FunctionDef,)) - funky = TestFunc(a=3.5, input_spec=my_input_spec) - with pytest.raises(TypeError, match="missing 1 required positional argument"): + funky = TestFunc() + with pytest.raises(ValueError, match="Mandatory field 'a' is not set"): funky() @@ -641,18 +620,12 @@ def test_input_spec_func_2(): input_spec changes the type of the input (so error is not raised) """ - @python.define + @python.define(inputs={"a": python.arg(type=float, help="input a")}) def TestFunc(a: int): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=float, metadata={"help": "input a"}))], - bases=(FunctionDef,), - ) - - funky = TestFunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky, "a") == 3.5 + funky = TestFunc(a=3.5) + assert funky.a == 3.5 def test_input_spec_func_2a(): @@ -661,18 +634,12 @@ def test_input_spec_func_2a(): using the shorter syntax """ - @python.define + @python.define(inputs={"a": python.arg(type=float, help="input a")}) def TestFunc(a: int): return a - my_input_spec = SpecInfo( - name="Input", - fields=[("a", float, {"help": "input a"})], - bases=(FunctionDef,), - ) - - funky = TestFunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky, "a") == 3.5 + funky = TestFunc(a=3.5) + assert funky.a == 3.5 def test_input_spec_func_3(): @@ -680,26 +647,20 @@ def test_input_spec_func_3(): additional keys (allowed_values) are used in metadata """ - @python.define - def TestFunc(a): - return a - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib( - type=int, - metadata={"help": "input a", "allowed_values": [0, 1, 2]}, - ), + @python.define( + inputs={ + "a": python.arg( + type=int, + help="input a", + allowed_values=[0, 1, 2], ) - ], - bases=(FunctionDef,), + } ) + def TestFunc(a): + return a - funky = TestFunc(a=2, input_spec=my_input_spec) - assert getattr(funky, "a") == 2 + funky = TestFunc(a=2) + assert funky.a == 2 def test_input_spec_func_3a_except(): @@ -707,26 +668,20 @@ def test_input_spec_func_3a_except(): allowed_values is used in metadata and the ValueError is raised """ - @python.define - def TestFunc(a): - return a - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib( - type=int, - metadata={"help": "input a", "allowed_values": [0, 1, 2]}, - ), + @python.define( + inputs={ + "a": python.arg( + type=int, + help="input a", + allowed_values=[0, 1, 2], ) - ], - bases=(FunctionDef,), + } ) + def TestFunc(a): + return a with pytest.raises(ValueError, match="value of a has to be"): - TestFunc(a=3, input_spec=my_input_spec) + TestFunc(a=3) def test_input_spec_func_4(): @@ -734,27 +689,17 @@ def test_input_spec_func_4(): but b is set as mandatory in the input_spec, so error is raised if not provided """ - @python.define - def TestFunc(a, b=1): - return a + b - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib(type=int, metadata={"help": "input a", "mandatory": True}), - ), - ( - "b", - attr.ib(type=int, metadata={"help": "input b", "mandatory": True}), - ), - ], - bases=(FunctionDef,), + @python.define( + inputs={ + "a": python.arg(type=int, help="input a"), + "b": python.arg(type=int, help="input b"), + } ) + def TestFunc(a, b): + return a + b - funky = TestFunc(a=2, input_spec=my_input_spec) - with pytest.raises(Exception, match="b is mandatory"): + funky = TestFunc(a=2) + with pytest.raises(Exception, match="Mandatory field 'b' is not set"): funky() @@ -763,23 +708,16 @@ def test_input_spec_func_4a(): has a different default value, so value from the function is overwritten """ - @python.define + @python.define( + inputs={ + "a": python.arg(type=int, help="input a"), + "b": python.arg(type=int, help="input b", default=10), + } + ) def TestFunc(a, b=1): return a + b - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "a", - attr.ib(type=int, metadata={"help": "input a", "mandatory": True}), - ), - ("b", attr.ib(type=int, default=10, metadata={"help": "input b"})), - ], - bases=(FunctionDef,), - ) - - funky = TestFunc(a=2, input_spec=my_input_spec) + funky = TestFunc(a=2) outputs = funky() assert outputs.out == 12 @@ -789,18 +727,12 @@ def test_input_spec_func_5(): a single value is provided and should be converted to a list """ - @python.define + @python.define(inputs={"a": python.arg(type=MultiInputObj, help="input a")}) def TestFunc(a): return len(a) - my_input_spec = SpecInfo( - name="Input", - fields=[("a", attr.ib(type=MultiInputObj, metadata={"help": "input a"}))], - bases=(FunctionDef,), - ) - - funky = TestFunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky, "a") == MultiInputObj([3.5]) + funky = TestFunc(a=3.5) + assert funky.a == MultiInputObj([3.5]) outputs = funky() assert outputs.out == 1 @@ -808,17 +740,11 @@ def TestFunc(a): def test_output_spec_func_1(): """the function w/o annotated, but output_spec is used""" - @python.define + @python.define(outputs={"out1": python.out(type=float, help="output")}) def TestFunc(a): return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", attr.ib(type=float, metadata={"help": "output"}))], - bases=(BaseDef,), - ) - - funky = TestFunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5) outputs = funky() assert outputs.out1 == 3.5 @@ -828,17 +754,11 @@ def test_output_spec_func_1a_except(): float returned instead of int - TypeError """ - @python.define + @python.define(outputs={"out1": python.out(type=int, help="output")}) def TestFunc(a): return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", attr.ib(type=int, metadata={"help": "output"}))], - bases=(BaseDef,), - ) - - funky = TestFunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5) with pytest.raises(TypeError): funky() @@ -848,17 +768,11 @@ def test_output_spec_func_2(): output_spec changes the type of the output (so error is not raised) """ - @python.define + @python.define(outputs={"out1": python.out(type=float, help="output")}) def TestFunc(a) -> int: return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", attr.ib(type=float, metadata={"help": "output"}))], - bases=(BaseDef,), - ) - - funky = TestFunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5) outputs = funky() assert outputs.out1 == 3.5 @@ -869,17 +783,11 @@ def test_output_spec_func_2a(): using a shorter syntax """ - @python.define + @python.define(outputs={"out1": python.out(type=float, help="output")}) def TestFunc(a) -> int: return a - my_output_spec = SpecInfo( - name="Output", - fields=[("out1", float, {"help": "output"})], - bases=(BaseDef,), - ) - - funky = TestFunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5) outputs = funky() assert outputs.out1 == 3.5 @@ -889,22 +797,11 @@ def test_output_spec_func_3(): MultiOutputObj is used, output is a 2-el list, so converter doesn't do anything """ - @python.define + @python.define(outputs={"out_list": python.out(type=MultiOutputObj, help="output")}) def TestFunc(a, b): return [a, b] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_list", - attr.ib(type=MultiOutputObj, metadata={"help": "output"}), - ) - ], - bases=(BaseDef,), - ) - - funky = TestFunc(a=3.5, b=1, output_spec=my_output_spec) + funky = TestFunc(a=3.5, b=1) outputs = funky() assert outputs.out_list == [3.5, 1] @@ -914,24 +811,13 @@ def test_output_spec_func_4(): MultiOutputObj is used, output is a 1el list, so converter return the element """ - @python.define + @python.define(outputs={"out_list": python.out(type=MultiOutputObj, help="output")}) def TestFunc(a): return [a] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1el", - attr.ib(type=MultiOutputObj, metadata={"help": "output"}), - ) - ], - bases=(BaseDef,), - ) - - funky = TestFunc(a=3.5, output_spec=my_output_spec) + funky = TestFunc(a=3.5) outputs = funky() - assert outputs.out_1el == 3.5 + assert outputs.out_list == 3.5 def test_exception_func(): @@ -947,10 +833,10 @@ def test_result_none_1(): """checking if None is properly returned as the result""" @python.define - def fun_none(x): + def FunNone(x): return None - task = fun_none(name="none", x=3) + task = FunNone(x=3) outputs = task() assert outputs.out is None @@ -958,11 +844,11 @@ def fun_none(x): def test_result_none_2(): """checking if None is properly set for all outputs""" - @python.define - def fun_none(x) -> (ty.Any, ty.Any): - return None + @python.define(outputs=["out1", "out2"]) + def FunNone(x) -> tuple[ty.Any, ty.Any]: + return None # Do we actually want this behaviour? - task = fun_none(name="none", x=3) + task = FunNone(x=3) outputs = task() assert outputs.out1 is None assert outputs.out2 is None @@ -971,38 +857,40 @@ def fun_none(x) -> (ty.Any, ty.Any): def test_audit_prov( tmpdir, ): - @python.define - def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b # printing the audit message - funky = TestFunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=1) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) # saving the audit message into the file - funky = TestFunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=2) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) # this should be the default loctaion - message_path = tmpdir / funky.checksum / "messages" - assert (tmpdir / funky.checksum / "messages").exists() + message_path = tmpdir / funky._checksum / "messages" + assert (tmpdir / funky._checksum / "messages").exists() - collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact") - assert (tmpdir / funky.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / funky._checksum, message_path, ld_op="compact") + assert (tmpdir / funky._checksum / "messages.jsonld").exists() def test_audit_task(tmpdir): - @python.define - def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b from glob import glob - funky = TestFunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) - funky.cache_dir = tmpdir - funky() - message_path = tmpdir / funky.checksum / "messages" + funky = TestFunc(a=2) + funky( + cache_dir=tmpdir, + audit_flags=AuditFlag.PROV, + messengers=FileMessenger(), + name="TestFunc", + ) + message_path = tmpdir / funky._checksum / "messages" for file in glob(str(message_path) + "/*.jsonld"): with open(file) as f: @@ -1021,20 +909,19 @@ def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] def test_audit_shellcommandtask(tmpdir): - args = "-l" - shelly = ShellTask( - name="shelly", - executable="ls", - args=args, - audit_flags=AuditFlag.PROV, - messengers=FileMessenger(), - ) + Shelly = shell.define("ls -l") from glob import glob - shelly.cache_dir = tmpdir - shelly() - message_path = tmpdir / shelly.checksum / "messages" + shelly = Shelly() + + shelly( + cache_dir=tmpdir, + audit_flags=AuditFlag.PROV, + messengers=FileMessenger(), + name="shelly", + ) + message_path = tmpdir / shelly._checksum / "messages" # go through each jsonld file in message_path and check if the label field exists command_content = [] @@ -1080,48 +967,33 @@ def test_audit_shellcommandtask_file(tmp_path): file_in_2 = File(tmp_path / "test2.txt") test_file_hash = hash_function(file_in) test_file_hash_2 = hash_function(file_in_2) - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "text", - "mandatory": True, - }, - ), + Shelly = shell.define( + cmd, + inputs={ + "in_file": shell.arg( + type=File, + position=1, + argstr="", + help="text", ), - ( - "in_file_2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help": "text", - "mandatory": True, - }, - ), + "in_file_2": shell.arg( + type=File, + position=2, + argstr="", + help="text", ), - ], - bases=(ShellDef,), + }, ) - shelly = ShellTask( - name="shelly", + shelly = Shelly( in_file=file_in, in_file_2=file_in_2, - input_spec=my_input_spec, - executable=cmd, + ) + shelly( + cache_dir=tmp_path, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - shelly.cache_dir = tmp_path - results = shelly() - message_path = tmp_path / shelly.checksum / "messages" + message_path = tmp_path / shelly._hash / "messages" for file in glob.glob(str(message_path) + "/*.jsonld"): with open(file) as x: data = json.load(x) @@ -1142,20 +1014,19 @@ def test_audit_shellcommandtask_version(tmpdir): "utf-8" ) version_cmd = version_cmd.splitlines()[0] - cmd = "less" - shelly = ShellTask( + cmd = "less test_task.py" + Shelly = shell.define(cmd) + shelly = Shelly() + + import glob + + shelly( + cache_dir=tmpdir, name="shelly", - executable=cmd, - args="test_task.py", audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - - import glob - - shelly.cache_dir = tmpdir - shelly() - message_path = tmpdir / shelly.checksum / "messages" + message_path = tmpdir / shelly._checksum / "messages" # go through each jsonld file in message_path and check if the label field exists version_content = [] for file in glob.glob(str(message_path) + "/*.jsonld"): @@ -1173,27 +1044,32 @@ def test_audit_prov_messdir_1( ): """customized messenger dir""" - @python.define - def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b # printing the audit message - funky = TestFunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=1) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) # saving the audit message into the file - funky = TestFunc(a=2, audit_flags=AuditFlag.PROV, messengers=FileMessenger()) + funky = TestFunc(a=2) # user defined path - message_path = tmpdir / funky.checksum / "my_messages" - funky.cache_dir = tmpdir + message_path = tmpdir / funky._checksum / "my_messages" # providing messenger_dir for audit - funky.audit.messenger_args = dict(message_dir=message_path) - funky() - assert (tmpdir / funky.checksum / "my_messages").exists() + funky_task = Task( + definition=funky, + submitter=Submitter( + cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger() + ), + name="funky", + ) + funky_task.audit.messenger_args = dict(message_dir=message_path) + funky_task.run() + assert (tmpdir / funky._checksum / "my_messages").exists() - collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact") - assert (tmpdir / funky.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / funky._checksum, message_path, ld_op="compact") + assert (tmpdir / funky._checksum / "messages.jsonld").exists() def test_audit_prov_messdir_2( @@ -1201,27 +1077,25 @@ def test_audit_prov_messdir_2( ): """customized messenger dir in init""" - @python.define - def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b # printing the audit message - funky = TestFunc(a=1, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) - funky.cache_dir = tmpdir - funky() + funky = TestFunc(a=1) + funky(cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=PrintMessenger()) # user defined path (doesn't depend on checksum, can be defined before init) message_path = tmpdir / "my_messages" # saving the audit message into the file - funky = TestFunc( - a=2, + funky = TestFunc(a=2) + # providing messenger_dir for audit + funky( + cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), messenger_args=dict(message_dir=message_path), ) - funky.cache_dir = tmpdir - # providing messenger_dir for audit - funky() assert (tmpdir / "my_messages").exists() collect_messages(tmpdir, message_path, ld_op="compact") @@ -1233,50 +1107,55 @@ def test_audit_prov_wf( ): """FileMessenger for wf""" - @python.define - def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b - wf = Workflow( + @workflow.define + def Workflow(x: int): + test_func = workflow.add(TestFunc(a=x)) + return test_func.out + + wf = Workflow(x=2) + + wf( name="wf", - input_spec=["x"], cache_dir=tmpdir, audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - wf.add(TestFunc(name="TestFunc", a=wf.lzin.x)) - wf.set_output([("out", wf.TestFunc.lzout.out)]) - wf.x = 2 - - wf(plugin="cf") # default path - message_path = tmpdir / wf.checksum / "messages" + message_path = tmpdir / wf._checksum / "messages" assert message_path.exists() - collect_messages(tmpdir / wf.checksum, message_path, ld_op="compact") - assert (tmpdir / wf.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / wf._checksum, message_path, ld_op="compact") + assert (tmpdir / wf._checksum / "messages.jsonld").exists() def test_audit_all( tmpdir, ): - @python.define - def TestFunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): + @python.define(outputs={"out": float}) + def TestFunc(a: int, b: float = 0.1): return a + b - funky = TestFunc(a=2, audit_flags=AuditFlag.ALL, messengers=FileMessenger()) - message_path = tmpdir / funky.checksum / "messages" - funky.cache_dir = tmpdir - funky.audit.messenger_args = dict(message_dir=message_path) - funky() + funky = TestFunc(a=2) + message_path = tmpdir / funky._checksum / "messages" + + funky( + cache_dir=tmpdir, + audit_flags=AuditFlag.ALL, + messengers=FileMessenger(), + messenger_args=dict(message_dir=message_path), + ) from glob import glob - assert len(glob(str(tmpdir / funky.checksum / "proc*.log"))) == 1 + assert len(glob(str(tmpdir / funky._checksum / "proc*.log"))) == 1 assert len(glob(str(message_path / "*.jsonld"))) == 7 # commented out to speed up testing - collect_messages(tmpdir / funky.checksum, message_path, ld_op="compact") - assert (tmpdir / funky.checksum / "messages.jsonld").exists() + collect_messages(tmpdir / funky._checksum, message_path, ld_op="compact") + assert (tmpdir / funky._checksum / "messages.jsonld").exists() @no_win @@ -1284,14 +1163,18 @@ def test_shell_cmd(tmpdir): cmd = ["echo", "hail", "pydra"] # all args given as executable - shelly = ShellTask(name="shelly", executable=cmd) + Shelly = shell.define(" ".join(cmd)) + shelly = Shelly() assert shelly.cmdline == " ".join(cmd) outputs = shelly() assert outputs.stdout == " ".join(cmd[1:]) + "\n" # separate command into exec + args - shelly = ShellTask(executable=cmd[0], args=cmd[1:]) - assert shelly.definition.executable == "echo" + Shelly = shell.define( + cmd[0], inputs=[shell.arg(name=a, default=a) for a in cmd[1:]] + ) + shelly = Shelly() + assert shelly.executable == "echo" assert shelly.cmdline == " ".join(cmd) outputs = shelly() assert outputs.return_code == 0 @@ -1303,23 +1186,20 @@ def test_functask_callable(tmpdir): foo = FunAddTwo(a=1) outputs = foo() assert outputs.out == 3 - assert foo.plugin is None # plugin bar = FunAddTwo(a=2) - outputs = bar(plugin="cf") + outputs = bar(worker="cf", cache_dir=tmpdir) assert outputs.out == 4 - assert bar.plugin is None - foo2 = FunAddTwo(a=3) - foo2.plugin = "cf" - outputs = foo2() - assert outputs.out == 5 - assert foo2.plugin == "cf" +def test_taskhooks_1(tmpdir: Path, capsys): + cache_dir = tmpdir / "cache" + cache_dir.mkdir() -def test_taskhooks_1(tmpdir, capsys): - foo = FunAddTwo(name="foo", a=1, cache_dir=tmpdir) + foo = Task( + definition=FunAddTwo(a=1), submitter=Submitter(cache_dir=cache_dir), name="foo" + ) assert foo.hooks # ensure all hooks are defined for attr in ("pre_run", "post_run", "pre_run_task", "post_run_task"): @@ -1330,7 +1210,7 @@ def myhook(task, *args): print("I was called") foo.hooks.pre_run = myhook - foo() + foo.run() captured = capsys.readouterr() assert "I was called\n" in captured.out del captured @@ -1339,28 +1219,36 @@ def myhook(task, *args): with pytest.raises(AttributeError): foo.hooks.mid_run = myhook + # clear cache + shutil.rmtree(cache_dir) + cache_dir.mkdir() + # set all hooks foo.hooks.post_run = myhook foo.hooks.pre_run_task = myhook foo.hooks.post_run_task = myhook - foo.a = 2 # ensure not pre-cached - foo() + foo.run() captured = capsys.readouterr() assert captured.out.count("I was called\n") == 4 del captured # hooks are independent across tasks by default - bar = FunAddTwo(name="bar", a=3, cache_dir=tmpdir) + bar = Task( + definition=FunAddTwo(a=3), name="bar", submitter=Submitter(cache_dir=tmpdir) + ) assert bar.hooks is not foo.hooks # but can be shared across tasks bar.hooks = foo.hooks # and workflows - wf = BasicWorkflow() - wf.tmpdir = tmpdir - wf.hooks = bar.hooks - assert foo.hooks == bar.hooks == wf.hooks + wf_task = Task( + definition=BasicWorkflow(x=1), + submitter=Submitter(cache_dir=tmpdir, worker="cf"), + name="wf", + ) + wf_task.hooks = bar.hooks + assert foo.hooks == bar.hooks == wf_task.hooks - wf(plugin="cf") + wf_task.run() captured = capsys.readouterr() assert captured.out.count("I was called\n") == 4 del captured @@ -1374,7 +1262,9 @@ def myhook(task, *args): def test_taskhooks_2(tmpdir, capsys): """checking order of the hooks; using task's attributes""" - foo = FunAddTwo(name="foo", a=1, cache_dir=tmpdir) + foo = Task( + definition=FunAddTwo(a=1), name="foo", submitter=Submitter(cache_dir=tmpdir) + ) def myhook_prerun(task, *args): print(f"i. prerun hook was called from {task.name}") @@ -1392,7 +1282,7 @@ def myhook_postrun(task, *args): foo.hooks.post_run = myhook_postrun foo.hooks.pre_run_task = myhook_prerun_task foo.hooks.post_run_task = myhook_postrun_task - foo() + foo.run() captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1405,17 +1295,19 @@ def myhook_postrun(task, *args): def test_taskhooks_3(tmpdir, capsys): """checking results in the post run hooks""" - foo = FunAddTwo(name="foo", a=1, cache_dir=tmpdir) + foo = Task( + definition=FunAddTwo(a=1), name="foo", submitter=Submitter(cache_dir=tmpdir) + ) def myhook_postrun_task(task, result, *args): - print(f"postrun task hook, the result is {outputs.out}") + print(f"postrun task hook, the result is {result.outputs.out}") def myhook_postrun(task, result, *args): - print(f"postrun hook, the result is {outputs.out}") + print(f"postrun hook, the result is {result.outputs.out}") foo.hooks.post_run = myhook_postrun foo.hooks.post_run_task = myhook_postrun_task - foo() + foo.run() captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1426,7 +1318,9 @@ def myhook_postrun(task, result, *args): def test_taskhooks_4(tmpdir, capsys): """task raises an error: postrun task should be called, postrun shouldn't be called""" - foo = FunAddTwo(name="foo", a="one", cache_dir=tmpdir) + foo = Task( + definition=FunAddTwo(a="one"), name="foo", submitter=Submitter(cache_dir=tmpdir) + ) def myhook_postrun_task(task, result, *args): print(f"postrun task hook was called, result object is {result}") @@ -1454,13 +1348,17 @@ def test_traceback(tmpdir): """ @python.define - def fun_error(x): + def FunError(x): raise Exception("Error from the function") - task = fun_error(name="error", cache_dir=tmpdir).split("x", x=[3, 4]) + task = Task( + name="error", + definition=FunError().split("x", x=[3, 4]), + submitter=Submitter(cache_dir=tmpdir), + ) with pytest.raises(Exception, match="from the function") as exinfo: - task() + task.run() # getting error file from the error message error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] @@ -1471,7 +1369,7 @@ def fun_error(x): error_tb = cp.loads(error_file.read_bytes())["error message"] # the error traceback should be a list and should point to a specific line in the function assert isinstance(error_tb, list) - assert "in fun_error" in error_tb[-2] + assert "in FunError" in error_tb[-2] def test_traceback_wf(tmpdir): @@ -1481,15 +1379,17 @@ def test_traceback_wf(tmpdir): """ @python.define - def fun_error(x): + def FunError(x): raise Exception("Error from the function") - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir).split("x", x=[3, 4]) - wf.add(fun_error(name="error", x=wf.lzin.x)) - wf.set_output([("out", wf.error.lzout.out)]) + @workflow.define + def Workflow(x): + error = workflow.add(FunError(x=x), name="error") + return error.out + wf = Workflow().split("x", x=[3, 4]) with pytest.raises(Exception, match="Task error raised an error") as exinfo: - wf() + wf(worker="cf") # getting error file from the error message error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] @@ -1500,7 +1400,7 @@ def fun_error(x): error_tb = cp.loads(error_file.read_bytes())["error message"] # the error traceback should be a list and should point to a specific line in the function assert isinstance(error_tb, list) - assert "in fun_error" in error_tb[-2] + assert "in FunError" in error_tb[-2] def test_rerun_errored(tmpdir, capfd): @@ -1508,20 +1408,20 @@ def test_rerun_errored(tmpdir, capfd): Only the errored tasks should be rerun""" @python.define - def pass_odds(x): + def PassOdds(x): if x % 2 == 0: - print(f"x%2 = {x % 2} (error)\n") + print(f"x={x} -> x%2 = {bool(x % 2)} (error)\n") raise Exception("even error") else: - print(f"x%2 = {x % 2}\n") + print(f"x={x} -> x%2 = {bool(x % 2)}\n") return x - task = pass_odds(name="pass_odds", cache_dir=tmpdir).split("x", x=[1, 2, 3, 4, 5]) + pass_odds = PassOdds().split("x", x=[1, 2, 3, 4, 5]) - with pytest.raises(Exception, match="even error"): - task() - with pytest.raises(Exception, match="even error"): - task() + with pytest.raises(Exception): + pass_odds(cache_dir=tmpdir, worker="cf") + with pytest.raises(Exception): + pass_odds(cache_dir=tmpdir, worker="cf") out, err = capfd.readouterr() stdout_lines = out.splitlines() @@ -1541,7 +1441,7 @@ def pass_odds(x): assert errors_found == 4 -@attr.s(auto_attribs=True) +@attrs.define(auto_attribs=True) class A: x: int @@ -1558,14 +1458,18 @@ def TestFunc(a: A): def test_argstr_formatting(): - @attr.define - class Inputs: + @shell.define + class Defn(ShellDef["Defn.Outputs"]): a1_field: str b2_field: float c3_field: ty.Dict[str, str] d4_field: ty.List[str] + executable = "dummy" + + class Outputs(ShellOutputs): + pass - inputs = Inputs("1", 2.0, {"c": "3"}, ["4"]) + inputs = Defn(a1_field="1", b2_field=2.0, c3_field={"c": "3"}, d4_field=["4"]) assert ( argstr_formatting( "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 9ea06ed60d..ff6a273bbf 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -9,6 +9,7 @@ import subprocess as sp import pytest from fileformats.generic import File +from pydra.engine.specs import ShellDef from ..submitter import Submitter from pydra.design import workflow, python @@ -34,18 +35,17 @@ ) -def result_no_submitter(shell_task, plugin=None): +def result_no_submitter(shell_def: ShellDef, plugin: str = None): """helper function to return result when running without submitter""" - return shell_task() + return shell_def(worker=plugin) -def result_submitter(shell_task, plugin): +def result_submitter(shell_def: ShellDef, plugin: str): """helper function to return result when running with submitter with specific plugin """ with Submitter(worker=plugin) as sub: - shell_task(submitter=sub) - return shell_task.result() + return sub(shell_def) dot_check = sp.run(["which", "dot"], stdout=sp.PIPE, stderr=sp.PIPE) @@ -276,7 +276,7 @@ def FunFileList(filename_list: ty.List[File]): @workflow.define(outputs=["out"]) def BasicWorkflow(x): - task1 = workflow.add(FunAddTwo(a=x, b=0)) + task1 = workflow.add(FunAddTwo(a=x)) task2 = workflow.add(FunAddVar(a=task1.out, b=2)) return task2.out diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index e129fb0c70..ab846ed427 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -48,7 +48,10 @@ def run(self, task: "Task[DefType]", rerun: bool = False) -> "Result": pass async def run_async(self, task: "Task[DefType]", rerun: bool = False) -> "Result": - return await task.run_async(rerun=rerun) + if task.is_async: + return await task.run_async(rerun=rerun) + else: + return task.run(rerun=rerun) def close(self): """Close this worker.""" diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index e559e371ad..12cfa74c78 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -1,6 +1,6 @@ from fileformats.generic import File, BinaryFile from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber -from pydra.engine.task import ShellTask +from pydra.engine.task import ShellDef from pydra.engine import specs from pydra.design import shell, python @@ -80,7 +80,7 @@ def OtherSpecificFuncTask(in_file: MyOtherFormatX) -> MyOtherFormatX: return in_file -class OtherSpecificShellTask(ShellTask): +class OtherSpecificShellTask(ShellDef): in_file: MyOtherFormatX = shell.arg( help="the input file", From 8bfbe8cd90278dbc762f12506e4c7c9c318d0559 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 11:18:25 +1100 Subject: [PATCH 194/342] updated fileformats dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4f55cb8225..0f6a4cc75a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", - "fileformats >=0.14", + "fileformats >=0.15a1", "platformdirs >=2", ] license = { file = "LICENSE" } From 27a04048a4ddc65800b4bff5248128639abf420d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 18:48:00 +1100 Subject: [PATCH 195/342] fixed support for multi-input objs --- pydra/utils/typing.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 158dade6f0..d24d66fa43 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -279,6 +279,8 @@ def expand_and_coerce(obj, pattern: ty.Union[type, tuple]): if not isinstance(pattern, tuple): return coerce_basic(obj, pattern) origin, pattern_args = pattern + if origin == MultiInputObj: + return coerce_multi_input(obj, pattern_args) if origin in UNION_TYPES: return coerce_union(obj, pattern_args) if origin is type: @@ -331,6 +333,21 @@ def coerce_union(obj, pattern_args): + "\n\n".join(f"{a} -> {e}" for a, e in zip(pattern_args, reasons)) ) + def coerce_multi_input(obj, pattern_args): + # Attempt to coerce the object into arg type of the MultiInputObj first, + # and if that fails, try to coerce it into a list of the arg type + try: + return coerce_sequence(list, obj, pattern_args) + except TypeError as e1: + try: + return [expand_and_coerce(obj, pattern_args)] + except TypeError as e2: + raise TypeError( + f"Could not coerce object ({obj!r}) to MultiInputObj[{pattern_args[0]}] " + f"either as sequence of {pattern_args[0]} ({e1}) or a single {pattern_args[0]} " + f"object to be wrapped in a list {e2}" + ) from e2 + def coerce_mapping( obj: ty.Mapping, type_: ty.Type[ty.Mapping], pattern_args: list ): @@ -407,26 +424,7 @@ def coerce_obj(obj, type_): f"Cannot coerce {obj!r} into {type_}{msg}{self.label_str}" ) from e - try: - return expand_and_coerce(object_, self.pattern) - except TypeError as e: - # Defial handling for MultiInputObjects (which are annoying) - if isinstance(self.pattern, tuple) and self.pattern[0] == MultiInputObj: - # Attempt to coerce the object into arg type of the MultiInputObj first, - # and if that fails, try to coerce it into a list of the arg type - inner_type_parser = copy(self) - inner_type_parser.pattern = self.pattern[1][0] - try: - return [inner_type_parser.coerce(object_)] - except TypeError: - add_exc_note( - e, - "Also failed to coerce to the arg-type of the MultiInputObj " - f"({self.pattern[1][0]})", - ) - raise e - else: - raise e + return expand_and_coerce(object_, self.pattern) def check_type(self, type_: ty.Type[ty.Any]): """Checks the given type to see whether it matches or is a subtype of the @@ -589,7 +587,7 @@ def check_sequence(tp_args, pattern_args): try: return expand_and_check(type_, self.pattern) except TypeError as e: - # Defial handling for MultiInputObjects (which are annoying) + # Special handling for MultiInputObjects (which are annoying) if not isinstance(self.pattern, tuple) or self.pattern[0] != MultiInputObj: raise e # Attempt to coerce the object into arg type of the MultiInputObj first, From d056d53d0bdc9daa67a6d24e587704b58803cd9f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 18:52:05 +1100 Subject: [PATCH 196/342] fixing support for multInputObjs --- pydra/design/base.py | 2 +- pydra/design/shell.py | 26 ++++++++----- pydra/engine/core.py | 2 +- pydra/engine/specs.py | 85 +++++++++++++++++++++++-------------------- 4 files changed, 64 insertions(+), 51 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index a8893ab039..79c4eaf2b2 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -833,7 +833,7 @@ def extract_function_inputs_and_outputs( if isinstance(inpt, arg_type): if inpt.default is EMPTY: inpt.default = default - elif inspect.isclass(inpt): + elif inspect.isclass(inpt) or ty.get_origin(inpt): inputs[inpt_name] = arg_type(type=inpt, default=default) else: raise ValueError( diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 6fe38e302d..560447de1f 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -100,17 +100,25 @@ class arg(Arg): formatter: ty.Callable | None = None @sep.validator - def _validate_sep(self, attribute, value): - if ( - value is not None - and self.type is not ty.Any - and ty.get_origin(self.type) is not MultiInputObj - ): - tp = ty.get_origin(self.type) or self.type - if not issubclass(tp, ty.Iterable): + def _validate_sep(self, _, sep): + if self.type is ty.Any: + return + if ty.get_origin(self.type) is MultiInputObj: + tp = ty.get_args(self.type)[0] + else: + tp = self.type + origin = ty.get_origin(tp) or tp + if inspect.isclass(origin) and issubclass(origin, ty.Iterable): + if sep is None: raise ValueError( - f"sep ({value!r}) can only be provided when type is iterable" + f"A value to 'sep' must be provided when type is iterable {tp} " + f"for field {self.name!r}" ) + elif sep is not None: + raise ValueError( + f"sep ({sep!r}) can only be provided when type is iterable {tp} " + f"for field {self.name!r}" + ) @attrs.define(kw_only=True) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 644beb6ab4..4a2daadc71 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -554,7 +554,7 @@ def _check_for_hash_changes(self): f"- {changed}: the {field_type} object passed to the {field.type}" f"field appears to have an unstable hash. This could be due to " "a stochastic/non-thread-safe attribute(s) of the object\n\n" - f"The {field.type}.__bytes_repr__() method can be implemented to " + f'A "bytes_repr" method for {field.type!r} can be implemented to ' "bespoke hashing methods based only on the stable attributes for " f"the `{field_type.__module__}.{field_type.__name__}` type. " f"See pydra/utils/hash.py for examples. Value: {val}\n" diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 739a58b02a..cfab2b9243 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -31,7 +31,7 @@ from . import helpers_state as hlpst from . import lazy from pydra.utils.hash import hash_function, Cache -from pydra.utils.typing import StateArray +from pydra.utils.typing import StateArray, MultiInputObj from pydra.design.base import Field, Arg, Out, RequirementSet, EMPTY from pydra.design import shell from pydra.engine.lazy import LazyInField, LazyOutField @@ -1126,47 +1126,50 @@ def _command_pos_args( # if False, nothing is added to the command. if value is True: cmd_add.append(field.argstr) + elif ty.get_origin(field.type) is MultiInputObj: + # if the field is MultiInputObj, it is used to create a list of arguments + for val in value or []: + cmd_add += self._format_arg(field, val) else: - if ( - field.argstr.endswith("...") - and isinstance(value, ty.Iterable) - and not isinstance(value, (str, bytes)) - ): - field.argstr = field.argstr.replace("...", "") - # if argstr has a more complex form, with "{input_field}" - if "{" in field.argstr and "}" in field.argstr: - argstr_formatted_l = [] - for val in value: - argstr_f = argstr_formatting( - field.argstr, self, value_updates={field.name: val} - ) - argstr_formatted_l.append(f" {argstr_f}") - cmd_el_str = field.sep.join(argstr_formatted_l) - else: # argstr has a simple form, e.g. "-f", or "--f" - cmd_el_str = field.sep.join( - [f" {field.argstr} {val}" for val in value] - ) - else: - # in case there are ... when input is not a list - field.argstr = field.argstr.replace("...", "") - if isinstance(value, ty.Iterable) and not isinstance( - value, (str, bytes) - ): - cmd_el_str = field.sep.join([str(val) for val in value]) - value = cmd_el_str - # if argstr has a more complex form, with "{input_field}" - if "{" in field.argstr and "}" in field.argstr: - cmd_el_str = field.argstr.replace(f"{{{field.name}}}", str(value)) - cmd_el_str = argstr_formatting(cmd_el_str, self.definition) - else: # argstr has a simple form, e.g. "-f", or "--f" - if value: - cmd_el_str = f"{field.argstr} {value}" - else: - cmd_el_str = "" - if cmd_el_str: - cmd_add += split_cmd(cmd_el_str) + cmd_add += self._format_arg(field, value) return field.position, cmd_add + def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: + """Returning arguments used to specify the command args for a single inputs""" + if ( + field.argstr.endswith("...") + and isinstance(value, ty.Iterable) + and not isinstance(value, (str, bytes)) + ): + field.argstr = field.argstr.replace("...", "") + # if argstr has a more complex form, with "{input_field}" + if "{" in field.argstr and "}" in field.argstr: + argstr_formatted_l = [] + for val in value: + argstr_f = argstr_formatting( + field.argstr, self, value_updates={field.name: val} + ) + argstr_formatted_l.append(f" {argstr_f}") + cmd_el_str = field.sep.join(argstr_formatted_l) + else: # argstr has a simple form, e.g. "-f", or "--f" + cmd_el_str = field.sep.join([f" {field.argstr} {val}" for val in value]) + else: + # in case there are ... when input is not a list + field.argstr = field.argstr.replace("...", "") + if isinstance(value, ty.Iterable) and not isinstance(value, (str, bytes)): + cmd_el_str = field.sep.join([str(val) for val in value]) + value = cmd_el_str + # if argstr has a more complex form, with "{input_field}" + if "{" in field.argstr and "}" in field.argstr: + cmd_el_str = field.argstr.replace(f"{{{field.name}}}", str(value)) + cmd_el_str = argstr_formatting(cmd_el_str, self) + else: # argstr has a simple form, e.g. "-f", or "--f" + if value: + cmd_el_str = f"{field.argstr} {value}" + else: + cmd_el_str = "" + return split_cmd(cmd_el_str) + def _get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: """Return bindings necessary to run task in an alternative root. @@ -1259,7 +1262,7 @@ def reset(self): setattr(self, val, donothing) -def split_cmd(cmd: str): +def split_cmd(cmd: str | None): """Splits a shell command line into separate arguments respecting quotes Parameters @@ -1272,6 +1275,8 @@ def split_cmd(cmd: str): str the command line string split into process args """ + if cmd is None: + return [] # Check whether running on posix or Windows system on_posix = platform.system() != "Windows" args = shlex.split(cmd, posix=on_posix) From f0fbb66a87ea9a1ba2c090d8ff52644f4df1b6b7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 20:05:21 +1100 Subject: [PATCH 197/342] handle the formatting of optional types in command_pos_args --- pydra/engine/specs.py | 9 +++++---- pydra/utils/typing.py | 9 +++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index cfab2b9243..fec01a0712 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -17,7 +17,7 @@ import cloudpickle as cp from fileformats.core import FileSet from pydra.utils.messenger import AuditFlag, Messenger -from pydra.utils.typing import TypeParser +from pydra.utils.typing import TypeParser, is_optional, non_optional_type from .helpers import ( attrs_fields, attrs_values, @@ -1100,6 +1100,7 @@ def _command_pos_args( cmd_add = [] # formatter that creates a custom command argument # it can take the value of the field, all inputs, or the value of other fields. + tp = non_optional_type(field.type) if is_optional(field.type) else field.type if field.formatter: call_args = inspect.getfullargspec(field.formatter) call_args_val = {} @@ -1121,12 +1122,12 @@ def _command_pos_args( cmd_el_str = cmd_el_str.strip().replace(" ", " ") if cmd_el_str != "": cmd_add += split_cmd(cmd_el_str) - elif field.type is bool and "{" not in field.argstr: + elif tp is bool and "{" not in field.argstr: # if value is simply True the original argstr is used, # if False, nothing is added to the command. if value is True: cmd_add.append(field.argstr) - elif ty.get_origin(field.type) is MultiInputObj: + elif ty.get_origin(tp) is MultiInputObj: # if the field is MultiInputObj, it is used to create a list of arguments for val in value or []: cmd_add += self._format_arg(field, val) @@ -1134,7 +1135,7 @@ def _command_pos_args( cmd_add += self._format_arg(field, value) return field.position, cmd_add - def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: + def _format_arg(self, field: shell.arg, value: ty.Any, tp: type) -> list[str]: """Returning arguments used to specify the command args for a single inputs""" if ( field.argstr.endswith("...") diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index d24d66fa43..fc0a1f3a44 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1042,6 +1042,15 @@ def is_optional(type_: type) -> bool: return False +def non_optional_type(type_: type) -> type: + if is_optional(type_): + non_optional = [a for a in ty.get_args(type_) if a is not type(None)] + if len(non_optional) == 1: + return non_optional[0] + return ty.Union[tuple(non_optional)] + return type_ + + def is_fileset_or_union(type_: type) -> bool: """Check if the type is a FileSet or a Union containing a FileSet""" if is_union(type_): From 783656e06647c8e4dadc4d51602f1c3443217f8c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 20:20:13 +1100 Subject: [PATCH 198/342] handle previous errors in update_status --- pydra/engine/submitter.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index b1bb46c49d..0983d5bbbc 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -525,9 +525,16 @@ def update_status(self) -> None: return # Check to see if any previously queued tasks have completed for index, task in list(self.queued.items()): - if task.done: + try: + is_done = task.done + except ValueError: + errored = True + is_done = False + else: + errored = False + if is_done: self.successful[task.state_index] = self.queued.pop(index) - elif task.errored: + elif task.errored or errored: self.errored[task.state_index] = self.queued.pop(index) elif task.run_start_time: self.running[task.state_index] = ( From f2175025616b5eaa8add6204c749bf0942952dad Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Feb 2025 20:20:44 +1100 Subject: [PATCH 199/342] removed erroneous signature in format_arg --- pydra/engine/specs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index fec01a0712..e9f812ea3c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1135,7 +1135,7 @@ def _command_pos_args( cmd_add += self._format_arg(field, value) return field.position, cmd_add - def _format_arg(self, field: shell.arg, value: ty.Any, tp: type) -> list[str]: + def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: """Returning arguments used to specify the command args for a single inputs""" if ( field.argstr.endswith("...") From 8969b3f235e81c95e6cbccdbc953bdc05988e509 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 17:16:29 +1100 Subject: [PATCH 200/342] upped test dependency to alpha versions with updated syntax --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0f6a4cc75a..768e9cdb19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,8 +53,8 @@ doc = [ "pandoc", "numpy", "scipy", - "fileformats-medimage", - # "pydra-mrtrix3", + "fileformats-medimage >= v0.10.0a", + "pydra-mrtrix3 >=3.0.4a14", "sphinx_rtd_theme", "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", From 5ee9c681dea34251c7872ce730ea34aefdec05fe Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 17:17:34 +1100 Subject: [PATCH 201/342] debugging test_tasks --- pydra/design/shell.py | 6 +++++- pydra/engine/core.py | 2 +- pydra/engine/specs.py | 2 +- pydra/engine/submitter.py | 4 ++-- pydra/engine/tests/test_task.py | 22 ++++++++-------------- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 560447de1f..c27c5e3d30 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -108,7 +108,11 @@ def _validate_sep(self, _, sep): else: tp = self.type origin = ty.get_origin(tp) or tp - if inspect.isclass(origin) and issubclass(origin, ty.Iterable): + if ( + inspect.isclass(origin) + and issubclass(origin, ty.Sequence) + and tp is not str + ): if sep is None: raise ValueError( f"A value to 'sep' must be provided when type is iterable {tp} " diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 4a2daadc71..3fa73ce8ee 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -457,7 +457,7 @@ def done(self): if _result: if _result.errored: self._errored = True - raise ValueError(f"Task {self.name} raised an error") + raise ValueError(f"Task {self.name!r} raised an error") else: return True return False diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index e9f812ea3c..4b495c883c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -748,7 +748,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: if not err_files: raise raise ValueError( - f"Task {lazy_field._node.name} raised an error, full crash report is " + f"Task {lazy_field._node.name!r} raised an error, full crash report is " f"here: " + ( str(err_files[0]) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 0983d5bbbc..c460239d2e 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -491,13 +491,13 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def task(self, index: StateIndex | None = None) -> "Task | list[Task[DefType]]": + def task(self, index: StateIndex = StateIndex()) -> "Task | list[Task[DefType]]": """Get a task object for a given state index.""" self.tasks # Ensure tasks are loaded try: return self._tasks[index] except KeyError: - if index is None: + if not index: return StateArray(self._tasks.values()) raise diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index efa993ab6a..3514b64dd9 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1351,14 +1351,8 @@ def test_traceback(tmpdir): def FunError(x): raise Exception("Error from the function") - task = Task( - name="error", - definition=FunError().split("x", x=[3, 4]), - submitter=Submitter(cache_dir=tmpdir), - ) - - with pytest.raises(Exception, match="from the function") as exinfo: - task.run() + with pytest.raises(Exception, match="Task 'FunError' raised an error") as exinfo: + FunError(x=3)(worker="cf", cache_dir=tmpdir) # getting error file from the error message error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] @@ -1383,12 +1377,12 @@ def FunError(x): raise Exception("Error from the function") @workflow.define - def Workflow(x): - error = workflow.add(FunError(x=x), name="error") - return error.out + def Workflow(x_list): + fun_error = workflow.add(FunError().split(x=x_list), name="fun_error") + return fun_error.out - wf = Workflow().split("x", x=[3, 4]) - with pytest.raises(Exception, match="Task error raised an error") as exinfo: + wf = Workflow(x_list=[3, 4]) + with pytest.raises(Exception, match="Task 'fun_error' raised an error") as exinfo: wf(worker="cf") # getting error file from the error message @@ -1463,7 +1457,7 @@ class Defn(ShellDef["Defn.Outputs"]): a1_field: str b2_field: float c3_field: ty.Dict[str, str] - d4_field: ty.List[str] + d4_field: ty.List[str] = shell.arg(sep=" ") executable = "dummy" class Outputs(ShellOutputs): From a368c40bb4433fb0bc9f0881cc59f1a8e59a36d5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 17:19:00 +1100 Subject: [PATCH 202/342] updated tutorial deps --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 768e9cdb19..bcbb51179e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,6 @@ test = [ "pympler", ] tutorial = [ - "fileformats-medimage-extras", "jupyter", "jupyter_contrib_nbextensions", "jupytext", @@ -92,7 +91,9 @@ tutorial = [ "nilearn", "numpy", "pandas", - "pydra-mrtrix3", + "fileformats-medimage >= v0.10.0a", + "fileformats-medimage-extras >= v0.10.0a", + "pydra-mrtrix3 >=3.0.4a14", "psutil", "pytest", "scipy", From 713939bb87250946aac582cb908c89219bdbe1d9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 18:44:52 +1100 Subject: [PATCH 203/342] debugged test_tasks --- pydra/engine/core.py | 11 ++++- pydra/engine/specs.py | 37 +++++++++++---- pydra/engine/submitter.py | 21 ++++++++- pydra/engine/tests/test_task.py | 82 +++++++++++++-------------------- 4 files changed, 88 insertions(+), 63 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 3fa73ce8ee..ab9e98bbf2 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -100,6 +100,10 @@ def __init__( name: str, environment: "Environment | None" = None, state_index: "state.StateIndex | None" = None, + pre_run: ty.Callable["Task", None] | None = None, + pre_run_task: ty.Callable["Task", None] | None = None, + post_run_task: ty.Callable["Task", None] | None = None, + post_run: ty.Callable["Task", None] | None = None, ): """ Initialize a task. @@ -142,7 +146,12 @@ def __init__( self.allow_cache_override = True self._checksum = None self._uid = uuid4().hex - self.hooks = TaskHook() + self.hooks = TaskHook( + pre_run=pre_run, + post_run=post_run, + pre_run_task=pre_run_task, + post_run_task=post_run_task, + ) self._errored = False self._lzout = None diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 4b495c883c..600f1f3c04 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -14,6 +14,7 @@ from copy import deepcopy from typing import Self import attrs +from attrs.converters import default_if_none import cloudpickle as cp from fileformats.core import FileSet from pydra.utils.messenger import AuditFlag, Messenger @@ -160,6 +161,10 @@ def __call__( messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, name: str | None = None, + pre_run: ty.Callable["Task", None] | None = None, + post_run: ty.Callable["Task", None] | None = None, + pre_run_task: ty.Callable["Task", None] | None = None, + post_run_task: ty.Callable["Task", None] | None = None, **kwargs: ty.Any, ) -> OutputsType: """Create a task from this definition and execute it to produce a result. @@ -212,8 +217,17 @@ def __call__( worker=worker, **kwargs, ) as sub: - result = sub(self, name=name) + result = sub( + self, + name=name, + pre_run=pre_run, + post_run=post_run, + pre_run_task=pre_run_task, + post_run_task=post_run_task, + ) except TypeError as e: + # Catch any inadvertent passing of task definition parameters to the + # execution call if hasattr(e, "__notes__") and WORKER_KWARG_FAIL_NOTE in e.__notes__: if match := re.match( r".*got an unexpected keyword argument '(\w+)'", str(e) @@ -1248,15 +1262,18 @@ def donothing(*args: ty.Any, **kwargs: ty.Any) -> None: class TaskHook: """Callable task hooks.""" - pre_run_task: ty.Callable = donothing - post_run_task: ty.Callable = donothing - pre_run: ty.Callable = donothing - post_run: ty.Callable = donothing - - def __setattr__(self, attr, val): - if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - raise AttributeError("Cannot set unknown hook") - super().__setattr__(attr, val) + pre_run_task: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + post_run_task: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + pre_run: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + post_run: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) def reset(self): for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index c460239d2e..bf89e97bab 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -169,6 +169,10 @@ def __call__( self, task_def: "TaskDef", name: str | None = "task", + pre_run: ty.Callable["Task", None] | None = None, + post_run: ty.Callable["Task", None] | None = None, + pre_run_task: ty.Callable["Task", None] | None = None, + post_run_task: ty.Callable["Task", None] | None = None, ): """Submitter run function.""" @@ -194,7 +198,16 @@ def Split(defn: TaskDef, output_types: dict): f"Task {self} is marked for combining, but not splitting. " "Use the `split` method to split the task before combining." ) - task = Task(task_def, submitter=self, name=name, environment=self.environment) + task = Task( + task_def, + submitter=self, + name=name, + environment=self.environment, + pre_run=pre_run, + post_run=post_run, + pre_run_task=pre_run_task, + post_run_task=post_run_task, + ) try: self.run_start_time = datetime.now() if self.worker.is_async: # Only workflow tasks can be async @@ -203,6 +216,12 @@ def Split(defn: TaskDef, output_types: dict): ) else: self.worker.run(task, rerun=self.rerun) + except Exception as e: + e.add_note( + f"Full crash report for {type(task_def).__name__!r} task is here: " + + str(task.output_dir / "_error.pklz") + ) + raise e finally: self.run_start_time = None PersistentCache().clean_up() diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 3514b64dd9..8ac0ec5158 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1198,7 +1198,7 @@ def test_taskhooks_1(tmpdir: Path, capsys): cache_dir.mkdir() foo = Task( - definition=FunAddTwo(a=1), submitter=Submitter(cache_dir=cache_dir), name="foo" + definition=FunAddTwo(a=1), submitter=Submitter(cache_dir=tmpdir), name="foo" ) assert foo.hooks # ensure all hooks are defined @@ -1209,8 +1209,7 @@ def test_taskhooks_1(tmpdir: Path, capsys): def myhook(task, *args): print("I was called") - foo.hooks.pre_run = myhook - foo.run() + FunAddTwo(a=1)(cache_dir=cache_dir, pre_run=myhook) captured = capsys.readouterr() assert "I was called\n" in captured.out del captured @@ -1219,52 +1218,31 @@ def myhook(task, *args): with pytest.raises(AttributeError): foo.hooks.mid_run = myhook + # reset all hooks + foo.hooks.reset() + for attr in ("pre_run", "post_run", "pre_run_task", "post_run_task"): + hook = getattr(foo.hooks, attr) + assert hook() is None + # clear cache shutil.rmtree(cache_dir) cache_dir.mkdir() # set all hooks - foo.hooks.post_run = myhook - foo.hooks.pre_run_task = myhook - foo.hooks.post_run_task = myhook - foo.run() - captured = capsys.readouterr() - assert captured.out.count("I was called\n") == 4 - del captured - - # hooks are independent across tasks by default - bar = Task( - definition=FunAddTwo(a=3), name="bar", submitter=Submitter(cache_dir=tmpdir) + FunAddTwo(a=1)( + cache_dir=cache_dir, + pre_run=myhook, + post_run=myhook, + pre_run_task=myhook, + post_run_task=myhook, ) - assert bar.hooks is not foo.hooks - # but can be shared across tasks - bar.hooks = foo.hooks - # and workflows - wf_task = Task( - definition=BasicWorkflow(x=1), - submitter=Submitter(cache_dir=tmpdir, worker="cf"), - name="wf", - ) - wf_task.hooks = bar.hooks - assert foo.hooks == bar.hooks == wf_task.hooks - - wf_task.run() captured = capsys.readouterr() assert captured.out.count("I was called\n") == 4 del captured - # reset all hooks - foo.hooks.reset() - for attr in ("pre_run", "post_run", "pre_run_task", "post_run_task"): - hook = getattr(foo.hooks, attr) - assert hook() is None - def test_taskhooks_2(tmpdir, capsys): """checking order of the hooks; using task's attributes""" - foo = Task( - definition=FunAddTwo(a=1), name="foo", submitter=Submitter(cache_dir=tmpdir) - ) def myhook_prerun(task, *args): print(f"i. prerun hook was called from {task.name}") @@ -1278,11 +1256,13 @@ def myhook_postrun_task(task, *args): def myhook_postrun(task, *args): print(f"iv. postrun hook was called {task.name}") - foo.hooks.pre_run = myhook_prerun - foo.hooks.post_run = myhook_postrun - foo.hooks.pre_run_task = myhook_prerun_task - foo.hooks.post_run_task = myhook_postrun_task - foo.run() + FunAddTwo(a=1)( + cache_dir=tmpdir, + pre_run=myhook_prerun, + post_run=myhook_postrun, + pre_run_task=myhook_prerun_task, + post_run_task=myhook_postrun_task, + ) captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1318,9 +1298,6 @@ def myhook_postrun(task, result, *args): def test_taskhooks_4(tmpdir, capsys): """task raises an error: postrun task should be called, postrun shouldn't be called""" - foo = Task( - definition=FunAddTwo(a="one"), name="foo", submitter=Submitter(cache_dir=tmpdir) - ) def myhook_postrun_task(task, result, *args): print(f"postrun task hook was called, result object is {result}") @@ -1328,11 +1305,10 @@ def myhook_postrun_task(task, result, *args): def myhook_postrun(task, result, *args): print("postrun hook should not be called") - foo.hooks.post_run = myhook_postrun - foo.hooks.post_run_task = myhook_postrun_task - with pytest.raises(Exception): - foo() + FunAddTwo(a="one")( + cache_dir=tmpdir, post_run=myhook_postrun, post_run_task=myhook_postrun_task + ) captured = capsys.readouterr() hook_messages = captured.out.strip().split("\n") @@ -1351,11 +1327,13 @@ def test_traceback(tmpdir): def FunError(x): raise Exception("Error from the function") - with pytest.raises(Exception, match="Task 'FunError' raised an error") as exinfo: + with pytest.raises(Exception, match="Error from the function") as exinfo: FunError(x=3)(worker="cf", cache_dir=tmpdir) # getting error file from the error message - error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] + error_file_match = ( + str(exinfo.value.__notes__[0]).split("here: ")[-1].split("_error.pklz")[0] + ) error_file = Path(error_file_match) / "_error.pklz" # checking if the file exists assert error_file.exists() @@ -1386,7 +1364,9 @@ def Workflow(x_list): wf(worker="cf") # getting error file from the error message - error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] + error_file_match = ( + str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0].strip() + ) error_file = Path(error_file_match) / "_error.pklz" # checking if the file exists assert error_file.exists() From d9ce7076758ad3dbca3172080a0f3f1a550255c0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 18:47:03 +1100 Subject: [PATCH 204/342] added fileformats-extras to doc dependencies --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bcbb51179e..e1ebd87ae6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", - "fileformats >=0.15a1", + "fileformats >=0.15a3", "platformdirs >=2", ] license = { file = "LICENSE" } @@ -53,6 +53,7 @@ doc = [ "pandoc", "numpy", "scipy", + "fileformats-extras >= v0.15.0a3", "fileformats-medimage >= v0.10.0a", "pydra-mrtrix3 >=3.0.4a14", "sphinx_rtd_theme", From 1c0f42cca3eee61306e733566346585f6e4e4c1a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 18:47:32 +1100 Subject: [PATCH 205/342] added fileformats-extras to tutorial deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index e1ebd87ae6..d106c0dd97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,7 @@ tutorial = [ "nilearn", "numpy", "pandas", + "fileformats-extras >= v0.15.0a3", "fileformats-medimage >= v0.10.0a", "fileformats-medimage-extras >= v0.10.0a", "pydra-mrtrix3 >=3.0.4a14", From 7f4dc31157fd2b2cbab09128a3edcc6fa1869bf9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 18:51:13 +1100 Subject: [PATCH 206/342] dropped py 3.10 from ci matrices --- .github/workflows/testdask.yml | 2 +- .github/workflows/testpydra.yml | 2 +- .github/workflows/testsingularity.yml | 2 +- .github/workflows/testslurm.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/testdask.yml b/.github/workflows/testdask.yml index 55cc0e665a..169e562fe6 100644 --- a/.github/workflows/testdask.yml +++ b/.github/workflows/testdask.yml @@ -18,7 +18,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest] - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.11', '3.12', '3.13'] fail-fast: false runs-on: ${{ matrix.os }} diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index db4d61f046..79d9a4b10c 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -50,7 +50,7 @@ jobs: strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.11', '3.12', '3.13'] install: ['wheel'] include: - os: 'ubuntu-latest' diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index f0d864a9e1..97dd951aea 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.11', '3.12', '3.13'] fail-fast: False steps: diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index a518c10951..d067a5155f 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -14,7 +14,7 @@ jobs: build: strategy: matrix: - python-version: [3.10.9, 3.11.5] + python-version: [3.11.5] fail-fast: false runs-on: ubuntu-latest env: From 7352a1788d1882e0527652405bd5849ed8a7d920 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 20:11:53 +1100 Subject: [PATCH 207/342] debugged test_state --- pydra/design/shell.py | 2 -- pydra/engine/tests/test_specs.py | 5 --- pydra/engine/tests/test_state.py | 56 ++++++++++++++++++++++++-------- 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index c27c5e3d30..cecf61e59e 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -623,8 +623,6 @@ def from_type_str(type_str) -> type: if ext_type.ext is not None: path_template = name + ext_type.ext kwds["path_template"] = path_template - if ty.get_origin(type_) is MultiInputObj: - kwds["sep"] = " " if option is None: add_arg(name, field_type, kwds) else: diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 07b4b18909..90bfeb004f 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -27,11 +27,6 @@ make_klass = lambda x: x -def test_basespec(): - definition = BaseDef() - assert definition.hash == "0b1d98df22ecd1733562711c205abca2" - - def test_runtime(): runtime = Runtime() assert hasattr(runtime, "rss_peak_gb") diff --git a/pydra/engine/tests/test_state.py b/pydra/engine/tests/test_state.py index 8339bdc2e6..47a695adf1 100644 --- a/pydra/engine/tests/test_state.py +++ b/pydra/engine/tests/test_state.py @@ -99,25 +99,27 @@ def test_state_1( def test_state_2_err(): with pytest.raises(PydraStateError) as exinfo: - State("NA", splitter={"a"}) + State(definition=example_def, name="NA", splitter={"a"}) assert "splitter has to be a string, a tuple or a list" == str(exinfo.value) def test_state_3_err(): with pytest.raises(PydraStateError) as exinfo: - State("NA", splitter=["a", "b"], combiner=("a", "b")) + State( + definition=example_def, name="NA", splitter=["a", "b"], combiner=("a", "b") + ) assert "combiner has to be a string or a list" == str(exinfo.value) def test_state_4_err(): - st = State("NA", splitter="a", combiner=["a", "b"]) + st = State(definition=example_def, name="NA", splitter="a", combiner=["a", "b"]) with pytest.raises(PydraStateError) as exinfo: st.combiner_validation() assert "all combiners have to be in the splitter" in str(exinfo.value) def test_state_5_err(): - st = State("NA", combiner="a") + st = State(definition=example_def, name="NA", combiner="a") with pytest.raises(PydraStateError) as exinfo: st.combiner_validation() assert "splitter has to be set before" in str(exinfo.value) @@ -335,7 +337,7 @@ def test_state_6(splitter, cont_dim, values, keys, splits): keys = [f"S.{k}" for k in keys] splits = [{f"S.{k}": v for k, v in el.items()} for el in splits] - st = State(splitter=splitter, name="S") + st = State(definition=example_def, splitter=splitter, name="S") st.prepare_states(inputs=inputs, cont_dim=cont_dim) # checking keys and splits @@ -371,7 +373,7 @@ def test_state_7(splitter, cont_dim, inputs, mismatch): cont_dim = {f"S.{k}": v for k, v in cont_dim.items()} inputs = {f"S.{k}": v for k, v in inputs.items()} - st = State(splitter=splitter, name="S") + st = State(definition=example_def, splitter=splitter, name="S") if mismatch: with pytest.raises(ValueError): @@ -421,7 +423,7 @@ def test_state_8(splitter, cont_dim, values, keys, shapes, splits): keys = [f"S.{k}" for k in keys] splits = [{f"S.{k}": v for k, v in el.items()} for el in splits] - st = State(splitter=splitter, name="S") + st = State(definition=example_def, splitter=splitter, name="S") st.prepare_states(inputs=inputs, cont_dim=cont_dim) # checking keys and splits @@ -462,7 +464,7 @@ def test_state_9(splitter, values, keys, splits): keys = [f"S.{k}" for k in keys] splits = [{f"S.{k}": v for k, v in el.items()} for el in splits] - st = State(splitter=splitter, name="S") + st = State(definition=example_def, splitter=splitter, name="S") st.prepare_states(inputs=inputs) # checking keys and splits @@ -1505,7 +1507,10 @@ def test_state_connect_innerspl_4(): st1 = State(definition=example_def, name="NA", splitter="a") st2 = State(definition=example_def, name="NB", splitter=["b", "c"]) st3 = State( - name="NC", splitter="d", other_states={"NA": (st1, "e"), "NB": (st2, "f")} + definition=example_def, + name="NC", + splitter="d", + other_states={"NA": (st1, "e"), "NB": (st2, "f")}, ) assert st3.splitter == [["_NA", "_NB"], "NC.d"] @@ -1789,7 +1794,11 @@ def test_state_connect_innerspl_combine_1(): only current part provided - the prev-state part had to be added""" st1 = State(definition=example_def, name="NA", splitter="a") st2 = State( - name="NB", splitter=["c", "b"], combiner=["b"], other_states={"NA": (st1, "b")} + definition=example_def, + name="NB", + splitter=["c", "b"], + combiner=["b"], + other_states={"NA": (st1, "b")}, ) assert st2.splitter == ["_NA", ["NB.c", "NB.b"]] @@ -1870,7 +1879,11 @@ def test_state_connect_innerspl_combine_2(): """ st1 = State(definition=example_def, name="NA", splitter="a") st2 = State( - name="NB", splitter=["c", "b"], combiner=["c"], other_states={"NA": (st1, "b")} + definition=example_def, + name="NB", + splitter=["c", "b"], + combiner=["c"], + other_states={"NA": (st1, "b")}, ) assert st2.splitter == ["_NA", ["NB.c", "NB.b"]] @@ -2064,6 +2077,7 @@ def test_state_connect_combine_prevst_4(): st1 = State(definition=example_def, name="NA", splitter="a") st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( + definition=example_def, name="NC", splitter=["_NA", "_NB"], combiner=["NA.a"], @@ -2116,6 +2130,7 @@ def test_state_connect_combine_prevst_5(): st1 = State(definition=example_def, name="NA", splitter="a") st2 = State(definition=example_def, name="NB", splitter="a") st3 = State( + definition=example_def, name="NC", splitter=("_NA", "_NB"), combiner=["NA.a"], @@ -2149,7 +2164,11 @@ def test_state_connect_combine_prevst_6(): """ st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) st2 = State( - name="NB", splitter="c", other_states={"NA": (st1, "b")}, combiner="NA.a" + definition=example_def, + name="NB", + splitter="c", + other_states={"NA": (st1, "b")}, + combiner="NA.a", ) assert st2.splitter == ["_NA", "NB.c"] assert st2.splitter_rpn == ["NA.a", "NA.b", "*", "NB.c", "*"] @@ -2277,7 +2296,9 @@ def function(self): def test_connect_splitters( splitter, other_states, expected_splitter, expected_prevst, expected_current ): - st = State(name="CN", splitter=splitter, other_states=other_states) + st = State( + definition=example_def, name="CN", splitter=splitter, other_states=other_states + ) st.set_input_groups() assert st.splitter == expected_splitter assert st.prev_state_splitter == expected_prevst @@ -2306,13 +2327,19 @@ def test_connect_splitters( ) def test_connect_splitters_exception_1(splitter, other_states): with pytest.raises(PydraStateError) as excinfo: - State(name="CN", splitter=splitter, other_states=other_states) + State( + definition=example_def, + name="CN", + splitter=splitter, + other_states=other_states, + ) assert "prev-state and current splitters are mixed" in str(excinfo.value) def test_connect_splitters_exception_2(): with pytest.raises(PydraStateError) as excinfo: st = State( + definition=example_def, name="CN", splitter="_NB", other_states={ @@ -2326,6 +2353,7 @@ def test_connect_splitters_exception_2(): def test_connect_splitters_exception_3(): with pytest.raises(PydraStateError) as excinfo: State( + definition=example_def, name="CN", splitter="_NB", other_states=[ From 7c97e55bfcb26923a8105c87f0c4df57842cde84 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 20:41:21 +1100 Subject: [PATCH 208/342] added mrtrix to docs build ci --- .github/workflows/docs.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index fe11e29b3a..0200363201 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -19,6 +19,15 @@ jobs: build: runs-on: ubuntu-latest steps: + - name: Install Minconda + uses: conda-incubator/setup-miniconda@v2 + with: + auto-activate-base: true + activate-environment: "" + - name: Install MRtrix via Conda + run: | + conda install -c mrtrix3 mrtrix3 + mrconvert --version - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow From d6ae5ea16cfacf2847efc588e31f4ba15dcaf5bb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 20:52:41 +1100 Subject: [PATCH 209/342] moved hooks arguments into single TaskHooks object in run methods --- pydra/engine/core.py | 14 ++------ pydra/engine/specs.py | 62 +++++++++++++++------------------ pydra/engine/submitter.py | 12 ++----- pydra/engine/tests/test_task.py | 28 ++++++++------- 4 files changed, 50 insertions(+), 66 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index ab9e98bbf2..6e33972e17 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -29,7 +29,7 @@ from .specs import ( RuntimeSpec, Result, - TaskHook, + TaskHooks, ) from .helpers import ( attrs_fields, @@ -100,10 +100,7 @@ def __init__( name: str, environment: "Environment | None" = None, state_index: "state.StateIndex | None" = None, - pre_run: ty.Callable["Task", None] | None = None, - pre_run_task: ty.Callable["Task", None] | None = None, - post_run_task: ty.Callable["Task", None] | None = None, - post_run: ty.Callable["Task", None] | None = None, + hooks: TaskHooks | None = None, ): """ Initialize a task. @@ -146,12 +143,7 @@ def __init__( self.allow_cache_override = True self._checksum = None self._uid = uuid4().hex - self.hooks = TaskHook( - pre_run=pre_run, - post_run=post_run, - pre_run_task=pre_run_task, - post_run_task=post_run_task, - ) + self.hooks = hooks if hooks is not None else TaskHooks() self._errored = False self._lzout = None diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 600f1f3c04..146959aada 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -138,6 +138,32 @@ def __eq__(self, other: ty.Any) -> bool: OutputsType = ty.TypeVar("OutputType", bound=TaskOutputs) +def donothing(*args: ty.Any, **kwargs: ty.Any) -> None: + return None + + +@attrs.define(kw_only=True) +class TaskHooks: + """Callable task hooks.""" + + pre_run_task: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + post_run_task: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + pre_run: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + post_run: ty.Callable = attrs.field( + default=donothing, converter=default_if_none(donothing) + ) + + def reset(self): + for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: + setattr(self, val, donothing) + + @attrs.define(kw_only=True, auto_attribs=False, eq=False) class TaskDef(ty.Generic[OutputsType]): """Base class for all task definitions""" @@ -161,10 +187,7 @@ def __call__( messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, name: str | None = None, - pre_run: ty.Callable["Task", None] | None = None, - post_run: ty.Callable["Task", None] | None = None, - pre_run_task: ty.Callable["Task", None] | None = None, - post_run_task: ty.Callable["Task", None] | None = None, + hooks: TaskHooks | None = None, **kwargs: ty.Any, ) -> OutputsType: """Create a task from this definition and execute it to produce a result. @@ -220,10 +243,7 @@ def __call__( result = sub( self, name=name, - pre_run=pre_run, - post_run=post_run, - pre_run_task=pre_run_task, - post_run_task=post_run_task, + hooks=hooks, ) except TypeError as e: # Catch any inadvertent passing of task definition parameters to the @@ -1254,32 +1274,6 @@ def _generated_output_names(self, stdout: str, stderr: str): DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent -def donothing(*args: ty.Any, **kwargs: ty.Any) -> None: - return None - - -@attrs.define(kw_only=True) -class TaskHook: - """Callable task hooks.""" - - pre_run_task: ty.Callable = attrs.field( - default=donothing, converter=default_if_none(donothing) - ) - post_run_task: ty.Callable = attrs.field( - default=donothing, converter=default_if_none(donothing) - ) - pre_run: ty.Callable = attrs.field( - default=donothing, converter=default_if_none(donothing) - ) - post_run: ty.Callable = attrs.field( - default=donothing, converter=default_if_none(donothing) - ) - - def reset(self): - for val in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: - setattr(self, val, donothing) - - def split_cmd(cmd: str | None): """Splits a shell command line into separate arguments respecting quotes diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index bf89e97bab..82ae8d8e9e 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -29,7 +29,7 @@ if ty.TYPE_CHECKING: from .node import Node - from .specs import TaskDef, WorkflowDef + from .specs import TaskDef, WorkflowDef, TaskHooks from .environments import Environment from .state import State @@ -169,10 +169,7 @@ def __call__( self, task_def: "TaskDef", name: str | None = "task", - pre_run: ty.Callable["Task", None] | None = None, - post_run: ty.Callable["Task", None] | None = None, - pre_run_task: ty.Callable["Task", None] | None = None, - post_run_task: ty.Callable["Task", None] | None = None, + hooks: "TaskHooks | None" = None, ): """Submitter run function.""" @@ -203,10 +200,7 @@ def Split(defn: TaskDef, output_types: dict): submitter=self, name=name, environment=self.environment, - pre_run=pre_run, - post_run=post_run, - pre_run_task=pre_run_task, - post_run_task=post_run_task, + hooks=hooks, ) try: self.run_start_time = datetime.now() diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 8ac0ec5158..fcdf1d246c 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -11,11 +11,10 @@ from pydra.design import python, shell, workflow from pydra.utils.messenger import FileMessenger, PrintMessenger, collect_messages from ..task import AuditFlag -from pydra.engine.specs import argstr_formatting, ShellDef, ShellOutputs +from pydra.engine.specs import argstr_formatting, ShellDef, ShellOutputs, TaskHooks from pydra.engine.helpers import list_fields, print_help from pydra.engine.submitter import Submitter from pydra.engine.core import Task -from .utils import BasicWorkflow from pydra.utils import default_run_cache_dir from pydra.utils.typing import ( MultiInputObj, @@ -1209,7 +1208,7 @@ def test_taskhooks_1(tmpdir: Path, capsys): def myhook(task, *args): print("I was called") - FunAddTwo(a=1)(cache_dir=cache_dir, pre_run=myhook) + FunAddTwo(a=1)(cache_dir=cache_dir, hooks=TaskHooks(pre_run=myhook)) captured = capsys.readouterr() assert "I was called\n" in captured.out del captured @@ -1231,10 +1230,12 @@ def myhook(task, *args): # set all hooks FunAddTwo(a=1)( cache_dir=cache_dir, - pre_run=myhook, - post_run=myhook, - pre_run_task=myhook, - post_run_task=myhook, + hooks=TaskHooks( + pre_run=myhook, + post_run=myhook, + pre_run_task=myhook, + post_run_task=myhook, + ), ) captured = capsys.readouterr() assert captured.out.count("I was called\n") == 4 @@ -1258,10 +1259,12 @@ def myhook_postrun(task, *args): FunAddTwo(a=1)( cache_dir=tmpdir, - pre_run=myhook_prerun, - post_run=myhook_postrun, - pre_run_task=myhook_prerun_task, - post_run_task=myhook_postrun_task, + hooks=TaskHooks( + pre_run=myhook_prerun, + post_run=myhook_postrun, + pre_run_task=myhook_prerun_task, + post_run_task=myhook_postrun_task, + ), ) captured = capsys.readouterr() @@ -1307,7 +1310,8 @@ def myhook_postrun(task, result, *args): with pytest.raises(Exception): FunAddTwo(a="one")( - cache_dir=tmpdir, post_run=myhook_postrun, post_run_task=myhook_postrun_task + cache_dir=tmpdir, + hooks=TaskHooks(post_run=myhook_postrun, post_run_task=myhook_postrun_task), ) captured = capsys.readouterr() From d5a6587d5f107d1628994ce256d3bd382579c86d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 20:56:23 +1100 Subject: [PATCH 210/342] moved mrtrix install into docs build-new --- .github/workflows/docs.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0200363201..2fab919f96 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -19,15 +19,6 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Install Minconda - uses: conda-incubator/setup-miniconda@v2 - with: - auto-activate-base: true - activate-environment: "" - - name: Install MRtrix via Conda - run: | - conda install -c mrtrix3 mrtrix3 - mrconvert --version - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow @@ -56,6 +47,15 @@ jobs: build-new: runs-on: ubuntu-latest steps: + - name: Install Minconda + uses: conda-incubator/setup-miniconda@v3 + with: + auto-activate-base: true + activate-environment: "" + - name: Install MRtrix via Conda + run: | + conda install -c mrtrix3 mrtrix3 + mrconvert --version - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow From 01d50e2c282b0b41699dfae0abec19cd412da4cf Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Feb 2025 21:01:37 +1100 Subject: [PATCH 211/342] changed order in docs cd --- .github/workflows/docs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2fab919f96..f338d093aa 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -47,6 +47,9 @@ jobs: build-new: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Install Minconda uses: conda-incubator/setup-miniconda@v3 with: @@ -56,9 +59,6 @@ jobs: run: | conda install -c mrtrix3 mrtrix3 mrconvert --version - - uses: actions/checkout@v4 - - name: Fetch tags - run: git fetch --prune --unshallow - name: Set up Python uses: actions/setup-python@v5 with: From 128a3e9bb81a05eaa1256c739630d5adc76fc642 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 13 Feb 2025 21:40:45 +1100 Subject: [PATCH 212/342] added avoid_clashes option to copy_nested_files to ensure names don't clash when copying files with the same name from multiple node outputs --- .../source/tutorial/1-getting-started.ipynb | 15 +++-- new-docs/source/tutorial/tst.py | 58 +++++++++++++------ pydra/engine/helpers_file.py | 16 ++++- 3 files changed, 63 insertions(+), 26 deletions(-) diff --git a/new-docs/source/tutorial/1-getting-started.ipynb b/new-docs/source/tutorial/1-getting-started.ipynb index 9bb05264f6..a0d5b1e42b 100644 --- a/new-docs/source/tutorial/1-getting-started.ipynb +++ b/new-docs/source/tutorial/1-getting-started.ipynb @@ -30,6 +30,7 @@ "source": [ "from pathlib import Path\n", "from tempfile import mkdtemp\n", + "from pprint import pprint\n", "import json\n", "\n", "JSON_CONTENTS = {'a': True, 'b': 'two', 'c': 3, 'd': [7, 0.55, 6]}\n", @@ -160,7 +161,7 @@ "outputs = mrgrid()\n", "\n", "# Print the locations of the output files\n", - "print(\"\\n\".join(str(p) for p in outputs.out_file))" + "pprint(outputs.out_file)" ] }, { @@ -184,8 +185,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", "mrgrid_varying_vox_sizes = MrGrid(operation=\"regrid\").split(\n", " (\"in_file\", \"voxel\"),\n", " in_file=nifti_dir.iterdir(),\n", @@ -205,7 +204,9 @@ " ],\n", ")\n", "\n", - "print(\"\\n\".join(str(p) for p in outputs.out_file))" + "outputs = mrgrid_varying_vox_sizes()\n", + "\n", + "pprint(outputs.out_file)" ] }, { @@ -277,7 +278,9 @@ "metadata": {}, "outputs": [], "source": [ - "outputs = mrgrid(cache_root=Path(\"~/pydra-cache\").expanduser())" + "outputs = mrgrid(cache_dir=Path(\"~/pydra-cache\").expanduser())\n", + "\n", + "pprint(outputs)" ] }, { @@ -296,7 +299,7 @@ "source": [ "from pydra.utils import default_run_cache_dir\n", "\n", - "my_cache_dir = Path(\"~/pydra-cache\").expanduser()\n", + "my_cache_dir = Path(\"~/new-pydra-cache\").expanduser()\n", "my_cache_dir.mkdir(exist_ok=True)\n", "\n", "outputs = mrgrid(\n", diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 921700879d..8f74334939 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,25 +1,47 @@ -from pydra.tasks.testing import UnsafeDivisionWorkflow -from pydra.engine.submitter import Submitter +from pathlib import Path +from tempfile import mkdtemp +from pprint import pprint +import json +from pydra.utils.hash import hash_function +from pydra.tasks.mrtrix3.v3_0 import MrGrid +from fileformats.medimage import Nifti1 -# This workflow will fail because we are trying to divide by 0 -wf = UnsafeDivisionWorkflow(a=10, b=5, denominator=2) +JSON_CONTENTS = {"a": True, "b": "two", "c": 3, "d": [7, 0.55, 6]} -if __name__ == "__main__": - with Submitter(worker="cf", rerun=True) as sub: - result = sub(wf) +test_dir = Path(mkdtemp()) +cache_root = Path(mkdtemp()) +json_file = test_dir / "test.json" +with open(json_file, "w") as f: + json.dump(JSON_CONTENTS, f) +nifti_dir = test_dir / "nifti" +nifti_dir.mkdir() -# from pydra.tasks.testing import UnsafeDivisionWorkflow -# from pydra.engine.submitter import Submitter +for i in range(10): + Nifti1.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory -# # This workflow will fail because we are trying to divide by 0 -# failing_workflow = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2, 0]) +niftis = list(nifti_dir.iterdir()) +pprint([hash_function(nifti) for nifti in niftis]) -# if __name__ == "__main__": -# with Submitter(worker="cf") as sub: -# result = sub(failing_workflow) +mrgrid_varying_vox_sizes = MrGrid(operation="regrid").split( + ("in_file", "voxel"), + in_file=niftis, + # Define a list of voxel sizes to resample the NIfTI files to, + # the list must be the same length as the list of NIfTI files + voxel=[ + (1.0, 1.0, 1.0), + (1.0, 1.0, 1.0), + (1.0, 1.0, 1.0), + (0.5, 0.5, 0.5), + (0.75, 0.75, 0.75), + (0.5, 0.5, 0.5), + (0.5, 0.5, 0.5), + (1.0, 1.0, 1.0), + (1.25, 1.25, 1.25), + (1.25, 1.25, 1.25), + ], +) -# if result.errored: -# print("Workflow failed with errors:\n" + str(result.errors)) -# else: -# print("Workflow completed successfully :)") +outputs = mrgrid_varying_vox_sizes(cache_dir=cache_root) + +pprint(outputs.out_file) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 97c35a5da4..f942674747 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -9,7 +9,7 @@ import subprocess as sp from contextlib import contextmanager import attr -from fileformats.core import FileSet +from fileformats.generic import FileSet from pydra.engine.helpers import is_lazy, attrs_values, list_fields @@ -77,6 +77,10 @@ def copy_nested_files( cache: ty.Dict[FileSet, FileSet] = {} + # Set to keep track of file paths that have already been copied + # to allow FileSet.copy to avoid name clashes + clashes_to_avoid = set() + def copy_fileset(fileset: FileSet): try: return cache[fileset] @@ -89,7 +93,15 @@ def copy_fileset(fileset: FileSet): MountIndentifier.on_same_mount(p, dest_dir) for p in fileset.fspaths ): supported -= FileSet.CopyMode.hardlink - copied = fileset.copy(dest_dir=dest_dir, supported_modes=supported, **kwargs) + cp_kwargs = {} + + cp_kwargs.update(kwargs) + copied = fileset.copy( + dest_dir=dest_dir, + supported_modes=supported, + avoid_clashes=clashes_to_avoid, # this prevents fname clashes between filesets + **kwargs, + ) cache[fileset] = copied return copied From 6f60ae154ff19d4e791ae8206c6c54aa6354d2c9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 14 Feb 2025 09:03:08 +1100 Subject: [PATCH 213/342] debugged advanced execution tutorial --- .../tutorial/2-advanced-execution.ipynb | 152 +++++++++--------- new-docs/source/tutorial/tst.py | 68 +++----- pydra/engine/specs.py | 2 +- pydra/engine/submitter.py | 6 +- pydra/tasks/testing/__init__.py | 5 + 5 files changed, 109 insertions(+), 124 deletions(-) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index f7cacb4b7a..cae91f5d4a 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -11,7 +11,10 @@ "executed (e.g. on the cloud, on a HPC cluster, ...). This tutorial steps you through\n", "some of the available options for executing a task.\n", "\n", - "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)" + "[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nipype/pydra-tutorial/develop/notebooks/tutorial/advanced_execution.ipynb)\n", + "\n", + "Remember that before attempting to run multi-process code in Jupyter notebooks, the\n", + "following snippet must be called" ] }, { @@ -30,20 +33,8 @@ "source": [ "## Submitter\n", "\n", - "If you want to access a richer `Result` object you can use a Submitter object to execute the following task" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pydra.design import python\n", - "\n", - "@python.define\n", - "def TenToThePower(p: int) -> int:\n", - " return 10 ** p" + "If you want to access a richer `Result` object you can use a Submitter object to initiate\n", + "the task execution. For example, using the `TenToThePower` task from the testing package" ] }, { @@ -53,6 +44,8 @@ "outputs": [], "source": [ "from pydra.engine.submitter import Submitter\n", + "from pydra.tasks.testing import TenToThePower\n", + "\n", "\n", "ten_to_the_power = TenToThePower(p=3)\n", "\n", @@ -110,7 +103,11 @@ "class itself. Additional parameters can be passed to the worker initialisation as keyword\n", "arguments to the execution call. For example, if we wanted to run five tasks using the\n", "ConcurentFutures worker but only use three CPUs, we can pass `n_procs=3` to the execution\n", - "call." + "call.\n", + "\n", + "Remember that when calling multi-process code in a top level script the call must be\n", + "enclosed within a `if __name__ == \"__main__\"` block to allow the worker processes to\n", + "import the module without re-executing it." ] }, { @@ -119,14 +116,16 @@ "metadata": {}, "outputs": [], "source": [ - "from pydra.design import python\n", + "import tempfile\n", + "\n", + "cache_root = tempfile.mkdtemp()\n", "\n", "if __name__ == \"__main__\":\n", "\n", " ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5])\n", "\n", " # Run the 5 tasks in parallel split across 3 processes\n", - " outputs = ten_to_the_power(worker=\"cf\", n_procs=3)\n", + " outputs = ten_to_the_power(worker=\"cf\", n_procs=3, cache_dir=cache_root)\n", "\n", " p1, p2, p3, p4, p5 = outputs.out\n", "\n", @@ -168,7 +167,9 @@ "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", "go through some of the practicalities of this caching and hashing (see\n", "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", - "to consider)." + "to consider).\n", + "\n", + "First we import the functions and classes we need andcreate some sample NIfTI files to work with" ] }, { @@ -179,37 +180,18 @@ "source": [ "from pathlib import Path\n", "import tempfile\n", + "from pprint import pprint\n", "from fileformats.medimage import Nifti1\n", "from pydra.engine.submitter import Submitter\n", "from pydra.tasks.mrtrix3.v3_0 import MrGrid\n", "\n", - "# Make directory filled with nifti files\n", + "# Make a temporary directory\n", "test_dir = Path(tempfile.mkdtemp())\n", "nifti_dir = test_dir / \"nifti\"\n", "nifti_dir.mkdir()\n", - "for i in range(10):\n", - " Nifti1.sample(nifti_dir, seed=i)\n", - "\n", - "# Instantiate the task definition, \"splitting\" over all NIfTI files in the test directory\n", - "# by splitting the \"input\" input field over all files in the directory\n", - "mrgrid = MrGrid(operation=\"regrid\", voxel=(0.5, 0.5, 0.5)).split(\n", - " in_file=nifti_dir.iterdir()\n", - ")\n", - "\n", - "# Run the task to resample all NIfTI files\n", - "outputs = mrgrid()\n", - "\n", - "# Create a new custom directory\n", - "cache_dir = test_dir / \"cache\"\n", - "cache_dir.mkdir()\n", - "\n", - "submitter = Submitter(cache_dir=cache_dir)\n", - "\n", - "# Run the task to resample all NIfTI files with different voxel sizes\n", - "with submitter:\n", - " result1 = submitter(mrgrid)\n", "\n", - "print(result1)" + "# Generate some random NIfTI files to work with\n", + "nifti_files = [Nifti1.sample(nifti_dir, seed=i) for i in range(10)]" ] }, { @@ -243,21 +225,20 @@ "\n", "mrgrid_varying_vox = MrGrid(operation=\"regrid\").split(\n", " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", + " in_file=nifti_files,\n", " voxel=VOX_SIZES,\n", ")\n", "\n", "submitter = Submitter(cache_dir=test_dir / \"cache\")\n", "\n", "\n", - "# Result from previous run is reused as the task and inputs are identical\n", "with submitter:\n", " result1 = submitter(mrgrid_varying_vox)\n", "\n", "\n", "mrgrid_varying_vox2 = MrGrid(operation=\"regrid\").split(\n", " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", + " in_file=nifti_files,\n", " voxel=copy(VOX_SIZES),\n", ")\n", "\n", @@ -298,30 +279,30 @@ "outputs": [], "source": [ "# Rename a NIfTI file within the test directory\n", - "first_file = next(nifti_dir.iterdir())\n", - "new_name = first_file.with_name(\"first.nii\")\n", - "first_file.rename(new_name)\n", + "nifti_files[0] = Nifti1(\n", + " nifti_files[0].fspath.rename(nifti_files[0].fspath.with_name(\"first.nii\"))\n", + ")\n", "\n", "mrgrid_varying_vox3 = MrGrid(operation=\"regrid\").split(\n", " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", + " in_file=nifti_files,\n", " voxel=VOX_SIZES,\n", ")\n", "\n", - "# Result from previous run is reused as the task and inputs are identical\n", + "# Result from previous run is reused as contents of the files have not changed, despite\n", + "# the file names changing\n", "with submitter:\n", - " result3 = submitter(mrgrid_varying_vox3)\n", + " result4 = submitter(mrgrid_varying_vox3)\n", "\n", - "assert result3.output_dir == result1.output_dir\n", + "assert result4.output_dir == result1.output_dir\n", "\n", "# Replace the first NIfTI file with a new file\n", - "new_name.unlink()\n", - "Nifti1.sample(nifti_dir, seed=100)\n", + "nifti_files[0] = Nifti1.sample(nifti_dir, seed=100)\n", "\n", "# Update the in_file input field to include the new file\n", "mrgrid_varying_vox4 = MrGrid(operation=\"regrid\").split(\n", " (\"in_file\", \"voxel\"),\n", - " in_file=nifti_dir.iterdir(),\n", + " in_file=nifti_files,\n", " voxel=VOX_SIZES,\n", ")\n", "\n", @@ -333,19 +314,14 @@ "assert result4.output_dir != result1.output_dir" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Environments\n", + "## Environments and hooks\n", "\n", "For shell tasks, it is possible to specify that the command runs within a specific\n", - "software environment, such as those provided by software containers (e.g. Docker or Apptainer).\n", + "software environment, such as those provided by software containers (e.g. Docker or Singularity/Apptainer).\n", "This is down by providing the environment to the submitter/execution call," ] }, @@ -371,7 +347,7 @@ "outputs = mrgrid(environment=Docker(image=\"mrtrix3/mrtrix3\", tag=\"latest\"))\n", "\n", "# Print the locations of the output files\n", - "print(\"\\n\".join(str(p) for p in outputs.out_file))" + "pprint(outputs.out_file)" ] }, { @@ -381,31 +357,61 @@ "Of course for this to work Docker needs to work and be configured for\n", "[sudo-less execution](https://docs.docker.com/engine/install/linux-postinstall/).\n", "See [Containers and Environments](../explanation/environments.rst) for more details on\n", - "how to utilise containers and add support for other software environments." + "how to utilise containers and add support for other software environments.\n", + "\n", + "It is also possible to specify functions to run at hooks that are immediately before and after\n", + "the task is executed by passing a `pydra.engine.spec.TaskHooks` object to the `hooks`\n", + "keyword arg. The callable should take the `pydra.engine.core.Task` object as its only\n", + "argument and return None. The available hooks to attach functions are:\n", + "\n", + "* pre_run: before the task cache directory is created\n", + "* pre_run_task: after the cache directory has been created and the inputs resolved but before the task is executed\n", + "* post_run_task: after the task has been run and the outputs collected\n", + "* post_run: after the cache directory has been finalised\n", + "\n", + "\n", + "QUESTION: What are these hooks intended for? Should the post_run_task hook be run before the outputs have been\n", + "collected?" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Provenance and auditing\n", + "from pydra.engine.core import Task\n", + "from pydra.engine.specs import TaskHooks, Result\n", + "import os\n", + "import platform\n", + "\n", + "def notify_task_completion(task: Task, result: Result):\n", + " # Print a message to the terminal\n", + " print(f\"Task completed! Results are stored in {str(task.output_dir)!r}\")\n", + "\n", + " # Platform-specific notifications\n", + " if platform.system() == \"Darwin\": # macOS\n", + " os.system('osascript -e \\'display notification \"Task has completed successfully!\" with title \"Task Notification\"\\'')\n", + " elif platform.system() == \"Linux\": # Linux\n", + " os.system('notify-send \"Task Notification\" \"Task has completed successfully!\"')\n", + " elif platform.system() == \"Windows\": # Windows\n", + " os.system('msg * \"Task has completed successfully!\"')\n", "\n", - "Work in progress..." + "# Run the task to resample all NIfTI files\n", + "outputs = mrgrid(hooks=TaskHooks(post_run=notify_task_completion), cache_dir=tempfile.mkdtemp())\n", + "\n", + "# Print the locations of the output files\n", + "pprint(outputs.out_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Hooks\n", + "## Provenance and auditing\n", "\n", "Work in progress..." ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] } ], "metadata": { diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 8f74334939..2b27ac6385 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,47 +1,21 @@ -from pathlib import Path -from tempfile import mkdtemp -from pprint import pprint -import json -from pydra.utils.hash import hash_function -from pydra.tasks.mrtrix3.v3_0 import MrGrid -from fileformats.medimage import Nifti1 - -JSON_CONTENTS = {"a": True, "b": "two", "c": 3, "d": [7, 0.55, 6]} - -test_dir = Path(mkdtemp()) -cache_root = Path(mkdtemp()) -json_file = test_dir / "test.json" -with open(json_file, "w") as f: - json.dump(JSON_CONTENTS, f) - -nifti_dir = test_dir / "nifti" -nifti_dir.mkdir() - -for i in range(10): - Nifti1.sample(nifti_dir, seed=i) # Create a dummy NIfTI file in the dest. directory - -niftis = list(nifti_dir.iterdir()) -pprint([hash_function(nifti) for nifti in niftis]) - -mrgrid_varying_vox_sizes = MrGrid(operation="regrid").split( - ("in_file", "voxel"), - in_file=niftis, - # Define a list of voxel sizes to resample the NIfTI files to, - # the list must be the same length as the list of NIfTI files - voxel=[ - (1.0, 1.0, 1.0), - (1.0, 1.0, 1.0), - (1.0, 1.0, 1.0), - (0.5, 0.5, 0.5), - (0.75, 0.75, 0.75), - (0.5, 0.5, 0.5), - (0.5, 0.5, 0.5), - (1.0, 1.0, 1.0), - (1.25, 1.25, 1.25), - (1.25, 1.25, 1.25), - ], -) - -outputs = mrgrid_varying_vox_sizes(cache_dir=cache_root) - -pprint(outputs.out_file) +from pydra.design import python +import shutil + + +@python.define +def TenToThePower(p: int) -> int: + return 10**p + + +if __name__ == "__main__": + + shutil.rmtree("/Users/tclose/Library/Caches/pydra/0.25.dev190+g6a726571/run-cache") + + ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) + + # Run the 5 tasks in parallel split across 3 processes + outputs = ten_to_the_power(worker="cf", n_procs=3) + + p1, p2, p3, p4, p5 = outputs.out + + print(f"10^5 = {p5}") diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 146959aada..ff0551df69 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -261,7 +261,7 @@ def __call__( raise if result.errored: if isinstance(self, WorkflowDef) or self._splitter: - raise RuntimeError(f"Workflow {self} failed with errors:") + raise RuntimeError(f"Workflow {self} failed with errors") else: errors = result.errors raise RuntimeError( diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 82ae8d8e9e..b8075f6c8c 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -114,9 +114,9 @@ def __init__( ) if cache_dir is None: cache_dir = default_run_cache_dir - cache_dir.mkdir(parents=True, exist_ok=True) - elif not cache_dir.exists(): - raise ValueError(f"Cache directory {str(cache_dir)!r} does not exist") + cache_dir = Path(cache_dir).resolve() + cache_dir.mkdir(parents=True, exist_ok=True) + self.cache_dir = cache_dir self.cache_locations = cache_locations self.environment = environment diff --git a/pydra/tasks/testing/__init__.py b/pydra/tasks/testing/__init__.py index cff67a927c..e033eb134f 100644 --- a/pydra/tasks/testing/__init__.py +++ b/pydra/tasks/testing/__init__.py @@ -75,3 +75,8 @@ def SafeDivisionWorkflow(a: float, b: float, denominator: float) -> float: divide = workflow.add(SafeDivide(x=add.out, y=denominator)) subtract = workflow.add(Subtract(x=divide.out, y=b)) return subtract.out + + +@python.define +def TenToThePower(p: int) -> int: + return 10**p From 6237368ff8d76f8af113190e91946ea8cd58798d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 14 Feb 2025 09:30:44 +1100 Subject: [PATCH 214/342] fixed up docs cd --- .github/workflows/docs.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f338d093aa..76c8da2ef2 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -46,6 +46,10 @@ jobs: build-new: runs-on: ubuntu-latest + # Set up the environment so that it finds conda + defaults: + run: + shell: bash -l {0} steps: - uses: actions/checkout@v4 - name: Fetch tags From 072fae68d804718b49400ece672efd466a784c1e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 14 Feb 2025 10:45:01 +1100 Subject: [PATCH 215/342] debugging tutorials --- .../tutorial/2-advanced-execution.ipynb | 32 ++---- .../source/tutorial/3-troubleshooting.ipynb | 26 ++--- new-docs/source/tutorial/5-shell.ipynb | 100 +++++++++--------- new-docs/source/tutorial/6-workflow.ipynb | 17 ++- .../source/tutorial/7-canonical-form.ipynb | 16 ++- new-docs/source/tutorial/tst.py | 61 ++++++++--- pydra/design/base.py | 25 ++--- pydra/design/boutiques.py | 4 +- pydra/design/shell.py | 23 ++-- pydra/design/workflow.py | 16 ++- pydra/engine/core.py | 12 ++- pydra/engine/helpers.py | 4 +- pydra/engine/node.py | 3 +- pydra/engine/specs.py | 8 +- pydra/engine/submitter.py | 2 + pydra/tasks/testing/__init__.py | 2 +- 16 files changed, 214 insertions(+), 137 deletions(-) diff --git a/new-docs/source/tutorial/2-advanced-execution.ipynb b/new-docs/source/tutorial/2-advanced-execution.ipynb index cae91f5d4a..7765e71069 100644 --- a/new-docs/source/tutorial/2-advanced-execution.ipynb +++ b/new-docs/source/tutorial/2-advanced-execution.ipynb @@ -167,9 +167,15 @@ "as long as exactly the hashes of the inputs provided to the task are the same. Here we\n", "go through some of the practicalities of this caching and hashing (see\n", "[Caches and hashes](../explanation/hashing-caching.html) for more details and issues\n", - "to consider).\n", - "\n", - "First we import the functions and classes we need andcreate some sample NIfTI files to work with" + "to consider)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we attempt to run the same task with the same parameterisation the cache directory\n", + "will point to the same location and the results will be reused" ] }, { @@ -178,6 +184,7 @@ "metadata": {}, "outputs": [], "source": [ + "from copy import copy\n", "from pathlib import Path\n", "import tempfile\n", "from pprint import pprint\n", @@ -191,24 +198,7 @@ "nifti_dir.mkdir()\n", "\n", "# Generate some random NIfTI files to work with\n", - "nifti_files = [Nifti1.sample(nifti_dir, seed=i) for i in range(10)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "If we attempt to run the same task with the same parameterisation the cache directory\n", - "will point to the same location and the results will be reused" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from copy import copy\n", + "nifti_files = [Nifti1.sample(nifti_dir, seed=i) for i in range(10)]\n", "\n", "VOX_SIZES = [\n", " (0.5, 0.5, 0.5),\n", diff --git a/new-docs/source/tutorial/3-troubleshooting.ipynb b/new-docs/source/tutorial/3-troubleshooting.ipynb index 8020d3a3a7..2930c33d8c 100644 --- a/new-docs/source/tutorial/3-troubleshooting.ipynb +++ b/new-docs/source/tutorial/3-troubleshooting.ipynb @@ -129,13 +129,11 @@ "wf = UnsafeDivisionWorkflow(a=10, b=5).split(denominator=[3, 2 ,0])\n", "\n", "if __name__ == \"__main__\":\n", - " with Submitter(worker=\"cf\") as sub:\n", - " result = sub(wf)\n", - " \n", - "if result.errored:\n", - " print(\"Workflow failed with errors:\\n\" + str(result.errors))\n", - "else:\n", - " print(\"Workflow completed successfully :)\")" + " try:\n", + " with Submitter(worker=\"cf\") as sub:\n", + " result = sub(wf)\n", + " except Exception as e:\n", + " print(e)\n" ] }, { @@ -153,12 +151,15 @@ "metadata": {}, "outputs": [], "source": [ + "from pydra.utils import default_run_cache_dir\n", "import cloudpickle as cp\n", + "from pprint import pprint\n", + "from pydra.tasks.testing import Divide\n", "\n", - "with open(\"/`" + "Command line flags can also be added to the shell template, either the single or double hyphen form.\n", + "The field template name immediately following the flag will be associate with that flag.\n", + "If there is no space between the flag and the field template, then the field is assumed\n", + "to be a boolean, otherwise it is assumed to be of type string unless otherwise specified.\n", + "\n", + "If a field is optional, the field template should end with a `?`. Tuple fields are\n", + "specified by comma separated types. The ellipsis (`...`) can signify tuple types with\n", + "variable number of items. Arguments and options that can be repeated are specified by\n", + "appending a `+` (at least one must be provided) or `*` (defaults to empty list). Note that\n", + "for options, this signifies that the flag itself is printed multiple times. e.g.\n", + "`my-command --multi-opt 1 2 --multi-opt 1 5`." ] }, { @@ -139,14 +143,12 @@ "from pydra.engine.helpers import fields_dict\n", "\n", "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " )\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + ")\n", "\n", "pprint(fields_dict(Cp))\n", "pprint(fields_dict(Cp.Outputs))" @@ -168,14 +170,12 @@ "outputs": [], "source": [ "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " )\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + ")\n", "\n", "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" ] @@ -196,24 +196,24 @@ "outputs": [], "source": [ "Cp = shell.define(\n", - " (\n", - " \"cp \"\n", - " \"-R \"\n", - " \"--text-arg \"\n", - " \"--int-arg \"\n", - " \"--tuple-arg \"\n", - " ),\n", - " inputs={\"recursive\": shell.arg(\n", - " help=(\n", - " \"If source_file designates a directory, cp copies the directory and \"\n", - " \"the entire subtree connected at that point.\"\n", - " )\n", - " )},\n", - " outputs={\n", - " \"out_dir\": shell.outarg(position=-2),\n", - " \"out_file\": shell.outarg(position=-1),\n", - " },\n", - " )\n", + " (\n", + " \"cp \"\n", + " \"-R \"\n", + " \"--text-arg \"\n", + " \"--int-arg \"\n", + " \"--tuple-arg \"\n", + " ),\n", + " inputs={\"recursive\": shell.arg(\n", + " help=(\n", + " \"If source_file designates a directory, cp copies the directory and \"\n", + " \"the entire subtree connected at that point.\"\n", + " )\n", + " )},\n", + " outputs={\n", + " \"out_dir\": shell.outarg(position=-2),\n", + " \"out_file\": shell.outarg(position=-1),\n", + " },\n", + ")\n", "\n", "\n", "pprint(fields_dict(Cp))\n", @@ -277,20 +277,16 @@ "* *name of an input*: the name of any of the input arguments to the task, including output args that are part of the command line (i.e. output files)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, "source": [ "To make workflows that use the interface type-checkable, the canonical form of a shell\n", "task dataclass should inherit from `shell.Def` parameterized by its nested Outputs class,\n", - "and the `Outputs` nested class should inherit from `shell.Outputs`." + "and the `Outputs` nested class should inherit from `shell.Outputs`. Arguments that are\n", + "provided None values are not included in the command line, so optional arguments should\n", + "be typed as one of these equivalent forms `ty.Union[T, None]`, `ty.Optional[T]` or `T | None`\n", + "and have a default of `None`." ] }, { @@ -300,6 +296,8 @@ "outputs": [], "source": [ "from pydra.engine.specs import ShellDef, ShellOutputs\n", + "from pydra.utils.typing import MultiInputObj\n", + "from fileformats.generic import FsObject, Directory\n", "\n", "@shell.define\n", "class Cp(ShellDef[\"Cp.Outputs\"]):\n", @@ -309,8 +307,8 @@ " in_fs_objects: MultiInputObj[FsObject]\n", " recursive: bool = shell.arg(argstr=\"-R\", default=False)\n", " text_arg: str = shell.arg(argstr=\"--text-arg\")\n", - " int_arg: int | None = shell.arg(argstr=\"--int-arg\")\n", - " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\")\n", + " int_arg: int | None = shell.arg(argstr=\"--int-arg\", default=None)\n", + " tuple_arg: tuple[int, str] | None = shell.arg(argstr=\"--tuple-arg\", default=None)\n", "\n", " @shell.outputs\n", " class Outputs(ShellOutputs):\n", diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index 6648f8b3e0..8ca4313e45 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -437,15 +437,13 @@ "import tempfile\n", "import numpy as np\n", "from fileformats.medimage import Nifti1\n", - "from fileformats.medimage_mrtrix3 import (\n", - " ImageFormat as Mif, ImageHeader as Mih, ImageDataFile as Mid\n", - ")\n", + "import fileformats.medimage_mrtrix3 as mrtrix3\n", "from pydra.engine.environments import Docker\n", "from pydra.design import workflow, python\n", "from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold\n", "\n", "@workflow.define(outputs=[\"out_image\"])\n", - "def ToyMedianThreshold(in_image: Nifti1) -> Mif:\n", + "def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat:\n", " \"\"\"A toy example workflow that\n", "\n", " * converts a NIfTI image to MRTrix3 image format with a separate header\n", @@ -458,21 +456,22 @@ " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", " )\n", "\n", + "\n", " @python.define\n", - " def SelectDataFile(in_file: Mih) -> Mid:\n", + " def SelectDataFile(in_file: mrtrix3.ImageHeader) -> mrtrix3.ImageDataFile:\n", " return in_file.data_file\n", "\n", - " select_data_file = workflow.add(SelectDataFile(in_file=input_conversion.out_file))\n", + " select_data = workflow.add(SelectDataFile(in_file=input_conversion.out_file))\n", "\n", " @python.define\n", - " def Median(data_file: Mid) -> float:\n", + " def Median(data_file: mrtrix3.ImageDataFile) -> float:\n", " data = np.load(data_file)\n", " return np.median(data)\n", "\n", - " median = workflow.add(Median(data_file=select_data_file.out))\n", + " median = workflow.add(Median(data_file=select_data.out))\n", " threshold = workflow.add(\n", " MrThreshold(\n", - " in_file=select_data_file.out,\n", + " in_file=in_image,\n", " abs=median.out\n", " ), \n", " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"\")\n", diff --git a/new-docs/source/tutorial/7-canonical-form.ipynb b/new-docs/source/tutorial/7-canonical-form.ipynb index 7e3ae1229d..d270949a1d 100644 --- a/new-docs/source/tutorial/7-canonical-form.ipynb +++ b/new-docs/source/tutorial/7-canonical-form.ipynb @@ -239,8 +239,22 @@ } ], "metadata": { + "kernelspec": { + "display_name": "wf12", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index 2b27ac6385..c894f26f7a 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,21 +1,58 @@ -from pydra.design import python -import shutil +import tempfile +import numpy as np +from fileformats.medimage import Nifti1 +import fileformats.medimage_mrtrix3 as mrtrix3 +from pydra.engine.environments import Docker +from pydra.design import workflow, python +from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold -@python.define -def TenToThePower(p: int) -> int: - return 10**p +@workflow.define(outputs=["out_image"]) +def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat: + """A toy example workflow that + * converts a NIfTI image to MRTrix3 image format with a separate header + * loads the separate data file and selects the median value + """ -if __name__ == "__main__": + input_conversion = workflow.add( + MrConvert(in_file=in_image, out_file="out_file.mih"), + name="input_conversion", + environment=Docker("mrtrix3/mrtrix3", tag="latest"), + ) - shutil.rmtree("/Users/tclose/Library/Caches/pydra/0.25.dev190+g6a726571/run-cache") + @python.define + def SelectDataFile(in_file: mrtrix3.ImageHeader) -> mrtrix3.ImageDataFile: + return in_file.data_file - ten_to_the_power = TenToThePower().split(p=[1, 2, 3, 4, 5]) + select_data = workflow.add(SelectDataFile(in_file=input_conversion.out_file)) - # Run the 5 tasks in parallel split across 3 processes - outputs = ten_to_the_power(worker="cf", n_procs=3) + @python.define + def Median(data_file: mrtrix3.ImageDataFile) -> float: + data = np.load(data_file) + return np.median(data) - p1, p2, p3, p4, p5 = outputs.out + median = workflow.add(Median(data_file=select_data.out)) + threshold = workflow.add( + MrThreshold(in_file=in_image, abs=median.out), + environment=Docker("mrtrix3/mrtrix3", tag=""), + ) - print(f"10^5 = {p5}") + output_conversion = workflow.add( + MrConvert(in_file=threshold.out_file, out_file="out_image.mif"), + name="output_conversion", + environment=Docker("mrtrix3/mrtrix3", tag="latest"), + ) + + return output_conversion.out_file + + +test_dir = tempfile.mkdtemp() + +nifti_file = Nifti1.sample(test_dir, seed=0) + +wf = ToyMedianThreshold(in_image=nifti_file) + +outputs = wf() + +print(outputs) diff --git a/pydra/design/base.py b/pydra/design/base.py index 79c4eaf2b2..22ab9ebedb 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -41,21 +41,21 @@ class _Empty(enum.Enum): - EMPTY = enum.auto() + NO_DEFAULT = enum.auto() def __repr__(self): - return "EMPTY" + return "NO_DEFAULT" def __bool__(self): return False -EMPTY = _Empty.EMPTY # To provide a blank placeholder for the default field +NO_DEFAULT = _Empty.NO_DEFAULT # To provide a blank placeholder for the default field def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: """Ensure the default value has been coerced into the correct type""" - if value is EMPTY or isinstance(value, attrs.Factory): + if value is NO_DEFAULT or isinstance(value, attrs.Factory): return value if self_.type is ty.Callable and isinstance(value, ty.Callable): return value @@ -168,7 +168,7 @@ class Field: The type of the field, by default it is Any from name to field, by default it is None default : Any, optional - the default value for the field, by default it is EMPTY + the default value for the field, by default it is NO_DEFAULT help: str, optional A short description of the input field. requires: str | list[str | list[str] | Requirement], optional @@ -189,7 +189,8 @@ class Field: validator=is_type, default=ty.Any, converter=default_if_none(ty.Any) ) default: ty.Any = attrs.field( - default=EMPTY, converter=attrs.Converter(convert_default_value, takes_self=True) + default=NO_DEFAULT, + converter=attrs.Converter(convert_default_value, takes_self=True), ) help: str = "" requires: list[RequirementSet] = attrs.field( @@ -205,7 +206,7 @@ def requirements_satisfied(self, inputs: "TaskDef") -> bool: @property def mandatory(self): - return self.default is EMPTY + return self.default is NO_DEFAULT @attrs.define(kw_only=True) @@ -220,7 +221,7 @@ class Arg(Field): type: type, optional The type of the field, by default it is Any default : Any, optional - the default value for the field, by default it is EMPTY + the default value for the field, by default it is NO_DEFAULT help: str A short description of the input field. allowed_values: list, optional @@ -262,7 +263,7 @@ class Out(Field): type: type, optional The type of the field, by default it is Any default : Any, optional - the default value for the field, by default it is EMPTY + the default value for the field, by default it is NO_DEFAULT help: str, optional A short description of the input field. requires: list, optional @@ -831,7 +832,7 @@ def extract_function_inputs_and_outputs( for inpt_name, default in input_defaults.items(): inpt = inputs[inpt_name] if isinstance(inpt, arg_type): - if inpt.default is EMPTY: + if inpt.default is NO_DEFAULT: inpt.default = default elif inspect.isclass(inpt) or ty.get_origin(inpt): inputs[inpt_name] = arg_type(type=inpt, default=default) @@ -989,7 +990,7 @@ def _get_attrs_kwargs(field: Field) -> dict[str, ty.Any]: kwargs = {} if not hasattr(field, "default"): kwargs["factory"] = nothing_factory - elif field.default is not EMPTY: + elif field.default is not NO_DEFAULT: kwargs["default"] = field.default elif is_optional(field.type): kwargs["default"] = None @@ -1005,7 +1006,7 @@ def nothing_factory(): def set_none_default_if_optional(field: Field) -> None: - if is_optional(field.type) and field.default is EMPTY: + if is_optional(field.type) and field.default is NO_DEFAULT: field.default = None diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 8c1986c983..9a48edfd17 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -21,7 +21,7 @@ class arg(shell.arg): type: type, optional The type of the field, by default it is Any default : Any, optional - the default value for the field, by default it is EMPTY + the default value for the field, by default it is NO_DEFAULT help: str A short description of the input field. allowed_values: list, optional @@ -55,7 +55,7 @@ class out(shell.out): type: type, optional The type of the field, by default it is Any default : Any, optional - the default value for the field, by default it is EMPTY + the default value for the field, by default it is NO_DEFAULT help: str, optional A short description of the input field. requires: list, optional diff --git a/pydra/design/shell.py b/pydra/design/shell.py index cecf61e59e..6f6cde3441 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -20,7 +20,7 @@ extract_fields_from_class, ensure_field_objects, make_task_def, - EMPTY, + NO_DEFAULT, ) from pydra.utils.typing import is_fileset_or_union, MultiInputObj @@ -211,7 +211,7 @@ class outarg(arg, Out): @path_template.validator def _validate_path_template(self, attribute, value): - if value and self.default not in (EMPTY, True, None): + if value and self.default not in (NO_DEFAULT, True, None): raise ValueError( f"path_template ({value!r}) can only be provided when no default " f"({self.default!r}) is provided" @@ -512,7 +512,7 @@ def parse_command_line_template( if not args_str: return executable, inputs, outputs tokens = re.split(r"\s+", args_str.strip()) - arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+]+(?:\?|=[^>]+)?)>" + arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+\*]+(?:\?|=[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) opt_re = re.compile(opt_pattern) @@ -568,7 +568,7 @@ def from_type_str(type_str) -> type: ) from None types.append(type_) if len(types) == 2 and types[1] == "...": - type_ = MultiInputObj[types[0]] + type_ = tuple[types[0], ...] elif len(types) > 1: type_ = tuple[*types] else: @@ -590,21 +590,32 @@ def from_type_str(type_str) -> type: field_type = arg # Identify type after ':' symbols kwds = {} + is_multi = False + optional = False if name.endswith("?"): assert "=" not in name name = name[:-1] optional = True kwds["default"] = None + elif name.endswith("+"): + is_multi = True + name = name[:-1] + elif name.endswith("*"): + is_multi = True + name = name[:-1] + kwds["default"] = attrs.Factory(list) elif "=" in name: name, default = name.split("=") kwds["default"] = eval(default) - else: - optional = False if ":" in name: name, type_str = name.split(":") type_ = from_type_str(type_str) + if ty.get_origin(type_) is tuple: + kwds["sep"] = " " else: type_ = generic.FsObject if option is None else str + if is_multi: + type_ = MultiInputObj[type_] if optional: type_ |= None # Make the arguments optional kwds["type"] = type_ diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 68dfcc37d9..e9d5af3ec2 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -16,6 +16,8 @@ if ty.TYPE_CHECKING: from pydra.engine.core import Workflow from pydra.engine.specs import TaskDef, TaskOutputs, WorkflowDef + from pydra.engine.environments import Environment + from pydra.engine.specs import TaskHooks __all__ = ["define", "add", "this", "arg", "out"] @@ -206,7 +208,12 @@ def this() -> "Workflow": OutputsType = ty.TypeVar("OutputsType", bound="TaskOutputs") -def add(task_def: "TaskDef[OutputsType]", name: str = None) -> OutputsType: +def add( + task_def: "TaskDef[OutputsType]", + name: str | None = None, + environment: "Environment | None" = None, + hooks: "TaskHooks | None" = None, +) -> OutputsType: """Add a node to the workflow currently being constructed Parameters @@ -216,13 +223,18 @@ def add(task_def: "TaskDef[OutputsType]", name: str = None) -> OutputsType: name : str, optional The name of the node, by default it will be the name of the task definition class + environment : Environment, optional + The environment to run the task in, such as the Docker or Singularity container, + by default it will be the "native" + hooks : TaskHooks, optional + The hooks to run before or after the task, by default no hooks will be run Returns ------- Outputs The outputs definition of the node """ - return this().add(task_def, name=name) + return this().add(task_def, name=name, environment=environment, hooks=hooks) U = ty.TypeVar("U") diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 6e33972e17..b897211dc9 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -744,6 +744,7 @@ def add( task_def: TaskDef[OutputsType], name: str | None = None, environment: Environment | None = None, + hooks: TaskHooks | None = None, ) -> OutputsType: """Add a node to the workflow @@ -754,6 +755,11 @@ def add( name : str, optional The name of the node, by default it will be the name of the task definition class + environment : Environment, optional + The environment to run the task in, such as the Docker or Singularity container, + by default it will be the "native" + hooks : TaskHooks, optional + The hooks to run before or after the task, by default no hooks will be run Returns ------- @@ -770,7 +776,11 @@ def add( f"{task_def._task_type!r} tasks ({task_def!r})" ) node = Node[OutputsType]( - name=name, definition=task_def, workflow=self, environment=environment + name=name, + definition=task_def, + workflow=self, + environment=environment, + hooks=hooks, ) self._nodes[name] = node return node.lzout diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 5b89f6a31c..4e0b3900e7 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -80,7 +80,7 @@ def from_list_if_single(obj: ty.Any) -> ty.Any: def print_help(defn: "TaskDef[DefType]") -> list[str]: """Visit a task object and print its input/output interface.""" - from pydra.design.base import EMPTY + from pydra.design.base import NO_DEFAULT lines = [f"Help for {defn.__class__.__name__}"] if list_fields(defn): @@ -91,7 +91,7 @@ def print_help(defn: "TaskDef[DefType]") -> list[str]: ): continue default = "" - if f.default is not EMPTY and not f.name.startswith("_"): + if f.default is not NO_DEFAULT and not f.name.startswith("_"): default = f" (default: {f.default})" try: name = f.type.__name__ diff --git a/pydra/engine/node.py b/pydra/engine/node.py index cd238d6866..2598a08fb6 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -17,7 +17,7 @@ if ty.TYPE_CHECKING: from .core import Workflow from .environments import Environment - from pydra.engine.specs import TaskDef, TaskOutputs + from pydra.engine.specs import TaskDef, TaskOutputs, TaskHooks OutputType = ty.TypeVar("OutputType", bound="TaskOutputs") @@ -43,6 +43,7 @@ class Node(ty.Generic[OutputType]): name: str _definition: "TaskDef[OutputType]" _environment: "Environment | None" = None + _hooks: "TaskHooks | None" = None _workflow: "Workflow" = attrs.field(default=None, eq=False, hash=False, repr=False) _lzout: OutputType | None = attrs.field( init=False, default=None, eq=False, hash=False, repr=False diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index ff0551df69..affe44063d 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -33,7 +33,7 @@ from . import lazy from pydra.utils.hash import hash_function, Cache from pydra.utils.typing import StateArray, MultiInputObj -from pydra.design.base import Field, Arg, Out, RequirementSet, EMPTY +from pydra.design.base import Field, Arg, Out, RequirementSet, NO_DEFAULT from pydra.design import shell from pydra.engine.lazy import LazyInField, LazyOutField @@ -52,7 +52,7 @@ def is_set(value: ty.Any) -> bool: """Check if a value has been set.""" - return value not in (attrs.NOTHING, EMPTY) + return value not in (attrs.NOTHING, NO_DEFAULT) @attrs.define(kw_only=True, auto_attribs=False, eq=False) @@ -88,9 +88,9 @@ def _get_node(self): f"{self} outputs object is not a lazy output of a workflow node" ) from None - def __iter__(self) -> list[str]: + def __iter__(self) -> ty.Generator[str, None, None]: """The names of the fields in the output object""" - return sorted(f.name for f in attrs_fields(self)) + return iter(sorted(f.name for f in attrs_fields(self))) def __getitem__(self, name_or_index: str | int) -> ty.Any: """Return the value for the given attribute diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index b8075f6c8c..07ba42b063 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -576,6 +576,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: ), submitter=self.submitter, environment=self.node._environment, + hooks=self.node._hooks, name=self.node.name, ) else: @@ -589,6 +590,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: submitter=self.submitter, environment=self.node._environment, name=self.node.name, + hooks=self.node._hooks, state_index=index, ) diff --git a/pydra/tasks/testing/__init__.py b/pydra/tasks/testing/__init__.py index e033eb134f..e0aa35669a 100644 --- a/pydra/tasks/testing/__init__.py +++ b/pydra/tasks/testing/__init__.py @@ -14,7 +14,7 @@ def Divide(x: float, y: float) -> float: @python.define def SafeDivide(x: float, y: float) -> float: if y == 0: - return float("inf") + return float("nan") return x / y From 9481f30497303b986a636311959e9f2eaa37b14c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 14 Feb 2025 11:56:56 +1100 Subject: [PATCH 216/342] added medimage-extras to tutorial deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index d106c0dd97..a79406f42c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ doc = [ "scipy", "fileformats-extras >= v0.15.0a3", "fileformats-medimage >= v0.10.0a", + "fileformats-medimage-extras >= v0.10.0a", "pydra-mrtrix3 >=3.0.4a14", "sphinx_rtd_theme", "sphinx-click", From b0b45b90911ecf281714a2a3320b288d39b657ff Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 14 Feb 2025 17:12:41 +1100 Subject: [PATCH 217/342] re-implementing docker/singularity environments after refactor --- new-docs/source/tutorial/6-workflow.ipynb | 1 - pydra/engine/core.py | 1 + pydra/engine/environments.py | 4 +-- pydra/engine/specs.py | 34 ++++++++++------------- 4 files changed, 18 insertions(+), 22 deletions(-) diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index 8ca4313e45..ffbee4d14e 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -456,7 +456,6 @@ " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", " )\n", "\n", - "\n", " @python.define\n", " def SelectDataFile(in_file: mrtrix3.ImageHeader) -> mrtrix3.ImageDataFile:\n", " return in_file.data_file\n", diff --git a/pydra/engine/core.py b/pydra/engine/core.py index b897211dc9..c49620e61b 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -90,6 +90,7 @@ class Task(ty.Generic[DefType]): submitter: "Submitter | None" environment: "Environment | None" state_index: state.StateIndex + bindings: dict[str, ty.Any] | None = None # Bindings for the task environment _inputs: dict[str, ty.Any] | None = None diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 06fd3fdefe..0a08f1453c 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -94,7 +94,7 @@ class Docker(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts = task.definition._get_bindings(root=self.root) + mounts, inputs_mod_root = task.definition._get_bindings(root=self.root) docker_args = [ "docker", @@ -129,7 +129,7 @@ class Singularity(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts = task.definition._get_bindings(root=self.root) + mounts, inputs_mod_root = task.definition._get_bindings(root=self.root) # todo adding xargsy etc singularity_args = [ diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index affe44063d..13a65b1bae 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1205,7 +1205,9 @@ def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: cmd_el_str = "" return split_cmd(cmd_el_str) - def _get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: + def _get_bindings( + self, root: str | None = None + ) -> tuple[dict[str, tuple[str, str]], dict[str, tuple[Path, ...]]]: """Return bindings necessary to run task in an alternative root. This is primarily intended for contexts when a task is going @@ -1213,30 +1215,24 @@ def _get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: Arguments --------- - root: str + root: str, optional + Returns ------- bindings: dict Mapping from paths in the host environment to the target environment """ - + bindings: dict[str, tuple[str, str]] = {} + inputs_mod_root: dict[str, tuple[Path, ...]] = {} if root is None: - return {} - else: - self._prepare_bindings(root=root) - return self.bindings - - def _prepare_bindings(self, root: str): - """Prepare input files to be passed to the task - - This updates the ``bindings`` attribute of the current task to make files available - in an ``Environment``-defined ``root``. - """ + return bindings fld: Arg - for fld in attrs_fields(self): + for fld in list_fields(self): if TypeParser.contains_type(FileSet, fld.type): - fileset: FileSet = self[fld.name] + fileset: FileSet | None = self[fld.name] + if fileset is None: + continue if not isinstance(fileset, FileSet): raise NotImplementedError( "Generating environment bindings for nested FileSets are not " @@ -1247,13 +1243,13 @@ def _prepare_bindings(self, root: str): host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") # Default to mounting paths as read-only, but respect existing modes - old_mode = self.bindings.get(host_path, ("", "ro"))[1] - self.bindings[host_path] = (env_path, "rw" if copy else old_mode) + bindings[host_path] = (env_path, "rw" if copy else "ro") # Provide in-container paths without type-checking - self.inputs_mod_root[fld.name] = tuple( + inputs_mod_root[fld.name] = tuple( env_path / rel for rel in fileset.relative_fspaths ) + return bindings, inputs_mod_root def _generated_output_names(self, stdout: str, stderr: str): """Returns a list of all outputs that will be generated by the task. From d4ce1d397e880b017de98ad21414eb4d9ac9904b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 14 Feb 2025 18:49:43 +1100 Subject: [PATCH 218/342] fixed up toy environments example, all docs should build now --- new-docs/source/tutorial/6-workflow.ipynb | 45 ++++++++++++++++------- new-docs/source/tutorial/tst.py | 41 +++++++++++++++------ pydra/engine/environments.py | 14 +++++-- pydra/engine/specs.py | 24 +++++++----- pydra/utils/typing.py | 5 +++ 5 files changed, 91 insertions(+), 38 deletions(-) diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index ffbee4d14e..df0790f8ff 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -435,6 +435,7 @@ "outputs": [], "source": [ "import tempfile\n", + "from pathlib import Path\n", "import numpy as np\n", "from fileformats.medimage import Nifti1\n", "import fileformats.medimage_mrtrix3 as mrtrix3\n", @@ -442,6 +443,28 @@ "from pydra.design import workflow, python\n", "from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold\n", "\n", + "MRTRIX2NUMPY_DTYPES = {\n", + " \"Int8\": np.dtype(\"i1\"),\n", + " \"UInt8\": np.dtype(\"u1\"),\n", + " \"Int16LE\": np.dtype(\"i2\"),\n", + " \"UInt16LE\": np.dtype(\"u2\"),\n", + " \"Int32LE\": np.dtype(\"i4\"),\n", + " \"UInt32LE\": np.dtype(\"u4\"),\n", + " \"Float32LE\": np.dtype(\"f4\"),\n", + " \"Float64LE\": np.dtype(\"f8\"),\n", + " \"CFloat32LE\": np.dtype(\"c8\"),\n", + " \"CFloat64LE\": np.dtype(\"c16\"),\n", + "}\n", + "\n", + "\n", "@workflow.define(outputs=[\"out_image\"])\n", "def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat:\n", " \"\"\"A toy example workflow that\n", @@ -457,23 +480,17 @@ " )\n", "\n", " @python.define\n", - " def SelectDataFile(in_file: mrtrix3.ImageHeader) -> mrtrix3.ImageDataFile:\n", - " return in_file.data_file\n", - "\n", - " select_data = workflow.add(SelectDataFile(in_file=input_conversion.out_file))\n", - "\n", - " @python.define\n", - " def Median(data_file: mrtrix3.ImageDataFile) -> float:\n", - " data = np.load(data_file)\n", + " def Median(mih: mrtrix3.ImageHeader) -> float:\n", + " \"\"\"A bespoke function that reads the separate data file in the MRTrix3 image\n", + " header format (i.e. .mih) and calculates the median value.\"\"\"\n", + " dtype = MRTRIX2NUMPY_DTYPES[mih.metadata[\"datatype\"].strip()]\n", + " data = np.frombuffer(Path.read_bytes(mih.data_file), dtype=dtype)\n", " return np.median(data)\n", "\n", - " median = workflow.add(Median(data_file=select_data.out))\n", + " median = workflow.add(Median(mih=input_conversion.out_file))\n", " threshold = workflow.add(\n", - " MrThreshold(\n", - " in_file=in_image,\n", - " abs=median.out\n", - " ), \n", - " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"\")\n", + " MrThreshold(in_file=in_image, out_file=\"binary.mif\", abs=median.out),\n", + " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", " )\n", "\n", " output_conversion = workflow.add(\n", diff --git a/new-docs/source/tutorial/tst.py b/new-docs/source/tutorial/tst.py index c894f26f7a..d84d9ff4d3 100644 --- a/new-docs/source/tutorial/tst.py +++ b/new-docs/source/tutorial/tst.py @@ -1,4 +1,5 @@ import tempfile +from pathlib import Path import numpy as np from fileformats.medimage import Nifti1 import fileformats.medimage_mrtrix3 as mrtrix3 @@ -6,6 +7,27 @@ from pydra.design import workflow, python from pydra.tasks.mrtrix3.v3_0 import MrConvert, MrThreshold +MRTRIX2NUMPY_DTYPES = { + "Int8": np.dtype("i1"), + "UInt8": np.dtype("u1"), + "Int16LE": np.dtype("i2"), + "UInt16LE": np.dtype("u2"), + "Int32LE": np.dtype("i4"), + "UInt32LE": np.dtype("u4"), + "Float32LE": np.dtype("f4"), + "Float64LE": np.dtype("f8"), + "CFloat32LE": np.dtype("c8"), + "CFloat64LE": np.dtype("c16"), +} + @workflow.define(outputs=["out_image"]) def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat: @@ -22,20 +44,17 @@ def ToyMedianThreshold(in_image: Nifti1) -> mrtrix3.ImageFormat: ) @python.define - def SelectDataFile(in_file: mrtrix3.ImageHeader) -> mrtrix3.ImageDataFile: - return in_file.data_file - - select_data = workflow.add(SelectDataFile(in_file=input_conversion.out_file)) - - @python.define - def Median(data_file: mrtrix3.ImageDataFile) -> float: - data = np.load(data_file) + def Median(mih: mrtrix3.ImageHeader) -> float: + """A bespoke function that reads the separate data file in the MRTrix3 image + header format (i.e. .mih) and calculates the median value.""" + dtype = MRTRIX2NUMPY_DTYPES[mih.metadata["datatype"].strip()] + data = np.frombuffer(Path.read_bytes(mih.data_file), dtype=dtype) return np.median(data) - median = workflow.add(Median(data_file=select_data.out)) + median = workflow.add(Median(mih=input_conversion.out_file)) threshold = workflow.add( - MrThreshold(in_file=in_image, abs=median.out), - environment=Docker("mrtrix3/mrtrix3", tag=""), + MrThreshold(in_file=in_image, out_file="binary.mif", abs=median.out), + environment=Docker("mrtrix3/mrtrix3", tag="latest"), ) output_conversion = workflow.add( diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 0a08f1453c..42833e8fd2 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -94,7 +94,7 @@ class Docker(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, inputs_mod_root = task.definition._get_bindings(root=self.root) + mounts, input_updates = task.definition._get_bindings(root=self.root) docker_args = [ "docker", @@ -112,7 +112,11 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] values = execute( - docker_args + [docker_img] + task.definition._command_args(root=self.root), + docker_args + + [docker_img] + + task.definition._command_args( + root=self.root, input_updates=input_updates + ), ) output = dict(zip(keys, values)) if output["return_code"]: @@ -129,7 +133,7 @@ class Singularity(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, inputs_mod_root = task.definition._get_bindings(root=self.root) + mounts, input_updates = task.definition._get_bindings(root=self.root) # todo adding xargsy etc singularity_args = [ @@ -150,7 +154,9 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: values = execute( singularity_args + [singularity_img] - + task.definition._command_args(root=self.root), + + task.definition._command_args( + root=self.root, input_updates=input_updates + ), ) output = dict(zip(keys, values)) if output["return_code"]: diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 13a65b1bae..065dbd5a70 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -16,7 +16,7 @@ import attrs from attrs.converters import default_if_none import cloudpickle as cp -from fileformats.core import FileSet +from fileformats.generic import FileSet from pydra.utils.messenger import AuditFlag, Messenger from pydra.utils.typing import TypeParser, is_optional, non_optional_type from .helpers import ( @@ -1224,7 +1224,7 @@ def _get_bindings( Mapping from paths in the host environment to the target environment """ bindings: dict[str, tuple[str, str]] = {} - inputs_mod_root: dict[str, tuple[Path, ...]] = {} + input_updates: dict[str, tuple[Path, ...]] = {} if root is None: return bindings fld: Arg @@ -1233,10 +1233,10 @@ def _get_bindings( fileset: FileSet | None = self[fld.name] if fileset is None: continue - if not isinstance(fileset, FileSet): + if not isinstance(fileset, (os.PathLike, FileSet)): raise NotImplementedError( - "Generating environment bindings for nested FileSets are not " - "yet supported" + "Generating environment bindings for nested FileSets is not " + "supported yet" ) copy = fld.copy_mode == FileSet.CopyMode.copy @@ -1245,11 +1245,17 @@ def _get_bindings( # Default to mounting paths as read-only, but respect existing modes bindings[host_path] = (env_path, "rw" if copy else "ro") - # Provide in-container paths without type-checking - inputs_mod_root[fld.name] = tuple( - env_path / rel for rel in fileset.relative_fspaths + # Provide updated in-container paths to the command to be run. If a + # fs-object, which resolves to a single path, just pass in the name of + # that path relative to the location in the mount point in the container. + # If it is a more complex file-set with multiple paths, then it is converted + # into a tuple of paths relative to the base of the fileset. + input_updates[fld.name] = ( + env_path / fileset.name + if isinstance(fileset, os.PathLike) + else tuple(env_path / rel for rel in fileset.relative_fspaths) ) - return bindings, inputs_mod_root + return bindings, input_updates def _generated_output_names(self, stdout: str, stderr: str): """Returns a list of all outputs that will be generated by the task. diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index fc0a1f3a44..7dfb2a69f1 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -261,6 +261,11 @@ def __call__(self, obj: ty.Any) -> T: try: coerced = self.coerce(obj) except TypeError as e: + if obj is None: + raise TypeError( + f"Mandatory field{self.label_str} of type {self.tp} was not " + "provided a value (i.e. a value that wasn't None) " + ) from None raise TypeError( f"Incorrect type for field{self.label_str}: {obj!r} is not of type " f"{self.tp} (and cannot be coerced to it)" From b55ef24760c86bc4c9c6ffbdd203d8b3646b29bd Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 09:50:00 +1100 Subject: [PATCH 219/342] fixed bug in multi-input typing --- pydra/utils/typing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 7dfb2a69f1..d2420785a6 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -345,7 +345,7 @@ def coerce_multi_input(obj, pattern_args): return coerce_sequence(list, obj, pattern_args) except TypeError as e1: try: - return [expand_and_coerce(obj, pattern_args)] + return [expand_and_coerce(obj, pattern_args[0])] except TypeError as e2: raise TypeError( f"Could not coerce object ({obj!r}) to MultiInputObj[{pattern_args[0]}] " From 1488ccc6055a426af6221f49eb531b3a1fdd3de7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 10:45:41 +1100 Subject: [PATCH 220/342] upped fileformats dep versions --- .github/workflows/docs.yml | 1 + .github/workflows/testdask.yml | 1 + .github/workflows/testpsijlocal.yml | 1 + .github/workflows/testpsijslurm.yml | 1 + .github/workflows/testpydra.yml | 1 + .github/workflows/testsingularity.yml | 1 + .github/workflows/testslurm.yml | 1 + pyproject.toml | 8 ++++---- 8 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 76c8da2ef2..64cea89122 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,6 +9,7 @@ on: push: branches: - master + - develop pull_request: concurrency: diff --git a/.github/workflows/testdask.yml b/.github/workflows/testdask.yml index 169e562fe6..0e9f3d447c 100644 --- a/.github/workflows/testdask.yml +++ b/.github/workflows/testdask.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: diff --git a/.github/workflows/testpsijlocal.yml b/.github/workflows/testpsijlocal.yml index 2e1a752ed2..d99966bf5e 100644 --- a/.github/workflows/testpsijlocal.yml +++ b/.github/workflows/testpsijlocal.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: diff --git a/.github/workflows/testpsijslurm.yml b/.github/workflows/testpsijslurm.yml index 9dc9100800..57d4e07781 100644 --- a/.github/workflows/testpsijslurm.yml +++ b/.github/workflows/testpsijslurm.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 79d9a4b10c..f9f7229a10 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: defaults: diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index 97dd951aea..edcfb7c948 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index d067a5155f..62c1e43792 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -4,6 +4,7 @@ on: push: branches: - master + - develop pull_request: concurrency: diff --git a/pyproject.toml b/pyproject.toml index a79406f42c..3872118e2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ doc = [ "numpy", "scipy", "fileformats-extras >= v0.15.0a3", - "fileformats-medimage >= v0.10.0a", + "fileformats-medimage >= v0.10.0a2", "fileformats-medimage-extras >= v0.10.0a", "pydra-mrtrix3 >=3.0.4a14", "sphinx_rtd_theme", @@ -94,9 +94,9 @@ tutorial = [ "numpy", "pandas", "fileformats-extras >= v0.15.0a3", - "fileformats-medimage >= v0.10.0a", - "fileformats-medimage-extras >= v0.10.0a", - "pydra-mrtrix3 >=3.0.4a14", + "fileformats-medimage >= v0.10.0a2", + "fileformats-medimage-extras >= v0.10.0a2", + "pydra-mrtrix3 >=3.0.4a17", "psutil", "pytest", "scipy", From 377ab07727bf128d2bcc17e677ff5bc8980dcbba Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 10:57:20 +1100 Subject: [PATCH 221/342] debugging environments unittests --- pydra/engine/core.py | 4 ++ pydra/engine/environments.py | 66 ++++++++++++++++++++++++- pydra/engine/specs.py | 52 ------------------- pydra/engine/tests/test_environments.py | 54 +++++++++++++++----- pydra/engine/tests/test_workflow.py | 5 -- 5 files changed, 109 insertions(+), 72 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index c49620e61b..2c169b3ffc 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -123,6 +123,10 @@ def __init__( if state_index is None: state_index = state.StateIndex() + if not isinstance(definition, TaskDef): + raise ValueError( + f"Task definition ({definition!r}) must be a TaskDef, not {type(definition)}" + ) # Check that the definition is fully resolved and ready to run definition._check_resolved() definition._check_rules() diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 42833e8fd2..0cec18895a 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -1,10 +1,16 @@ import typing as ty +import os from .helpers import execute from pathlib import Path +from fileformats.generic import FileSet +from pydra.engine.helpers import list_fields +from pydra.utils.typing import TypeParser + if ty.TYPE_CHECKING: from pydra.engine.core import Task from pydra.engine.specs import ShellDef + from pydra.design import shell class Environment: @@ -87,6 +93,58 @@ def bind(self, loc, mode="ro"): loc_abs = Path(loc).absolute() return f"{loc_abs}:{self.root}{loc_abs}:{mode}" + def _get_bindings( + self, definition: "ShellDef", root: str | None = None + ) -> tuple[dict[str, tuple[str, str]], dict[str, tuple[Path, ...]]]: + """Return bindings necessary to run task in an alternative root. + + This is primarily intended for contexts when a task is going + to be run in a container with mounted volumes. + + Arguments + --------- + root: str, optional + + + Returns + ------- + bindings: dict + Mapping from paths in the host environment to the target environment + """ + bindings: dict[str, tuple[str, str]] = {} + input_updates: dict[str, tuple[Path, ...]] = {} + if root is None: + return bindings + fld: shell.arg + for fld in list_fields(definition): + if TypeParser.contains_type(FileSet, fld.type): + fileset: FileSet | None = definition[fld.name] + if fileset is None: + continue + if not isinstance(fileset, (os.PathLike, FileSet)): + raise NotImplementedError( + "Generating environment bindings for nested FileSets is not " + "supported yet" + ) + copy = fld.copy_mode == FileSet.CopyMode.copy + + host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") + + # Default to mounting paths as read-only, but respect existing modes + bindings[host_path] = (env_path, "rw" if copy else "ro") + + # Provide updated in-container paths to the command to be run. If a + # fs-object, which resolves to a single path, just pass in the name of + # that path relative to the location in the mount point in the container. + # If it is a more complex file-set with multiple paths, then it is converted + # into a tuple of paths relative to the base of the fileset. + input_updates[fld.name] = ( + env_path / fileset.name + if isinstance(fileset, os.PathLike) + else tuple(env_path / rel for rel in fileset.relative_fspaths) + ) + return bindings, input_updates + class Docker(Container): """Docker environment.""" @@ -94,7 +152,9 @@ class Docker(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = task.definition._get_bindings(root=self.root) + mounts, input_updates = self._get_bindings( + definition=task.definition, root=self.root + ) docker_args = [ "docker", @@ -133,7 +193,9 @@ class Singularity(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = task.definition._get_bindings(root=self.root) + mounts, input_updates = self._get_bindings( + definition=task.definition, root=self.root + ) # todo adding xargsy etc singularity_args = [ diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 065dbd5a70..27969fd587 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1205,58 +1205,6 @@ def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: cmd_el_str = "" return split_cmd(cmd_el_str) - def _get_bindings( - self, root: str | None = None - ) -> tuple[dict[str, tuple[str, str]], dict[str, tuple[Path, ...]]]: - """Return bindings necessary to run task in an alternative root. - - This is primarily intended for contexts when a task is going - to be run in a container with mounted volumes. - - Arguments - --------- - root: str, optional - - - Returns - ------- - bindings: dict - Mapping from paths in the host environment to the target environment - """ - bindings: dict[str, tuple[str, str]] = {} - input_updates: dict[str, tuple[Path, ...]] = {} - if root is None: - return bindings - fld: Arg - for fld in list_fields(self): - if TypeParser.contains_type(FileSet, fld.type): - fileset: FileSet | None = self[fld.name] - if fileset is None: - continue - if not isinstance(fileset, (os.PathLike, FileSet)): - raise NotImplementedError( - "Generating environment bindings for nested FileSets is not " - "supported yet" - ) - copy = fld.copy_mode == FileSet.CopyMode.copy - - host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") - - # Default to mounting paths as read-only, but respect existing modes - bindings[host_path] = (env_path, "rw" if copy else "ro") - - # Provide updated in-container paths to the command to be run. If a - # fs-object, which resolves to a single path, just pass in the name of - # that path relative to the location in the mount point in the container. - # If it is a more complex file-set with multiple paths, then it is converted - # into a tuple of paths relative to the base of the fileset. - input_updates[fld.name] = ( - env_path / fileset.name - if isinstance(fileset, os.PathLike) - else tuple(env_path / rel for rel in fileset.relative_fspaths) - ) - return bindings, input_updates - def _generated_output_names(self, stdout: str, stderr: str): """Returns a list of all outputs that will be generated by the task. Takes into account the task input and the requires list for the output fields. diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 9555b04e64..fd36afaf8a 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -50,12 +50,19 @@ def newcache(x): @need_docker def test_docker_1(tmp_path): """docker env: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) + + def newcache(x): + makedir(tmp_path, x) cmd = ["whoami"] docker = Docker(image="busybox") - shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) - assert shelly.cmdline == " ".join(cmd) + shell_def = shell.define(cmd) + shelly = Task( + definition=shell_def, + submitter=Submitter(cache_dir=newcache("shelly")), + name="shelly", + ) + assert shell_def.cmdline == " ".join(cmd) env_res = docker.execute(shelly) shelly_env = ShellDef( @@ -86,12 +93,19 @@ def test_docker_1(tmp_path): ) def test_docker_1_subm(tmp_path, docker): """docker env with submitter: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) - cmd = ["whoami"] + def newcache(x): + makedir(tmp_path, x) + + cmd = "whoami" docker = Docker(image="busybox") - shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) - assert shelly.cmdline == " ".join(cmd) + shell_def = shell.define(cmd)() + shelly = Task( + definition=shell_def, + submitter=Submitter(cache_dir=newcache("shelly")), + name="shelly", + ) + assert shell_def.cmdline == cmd env_res = docker.execute(shelly) shelly_env = ShellDef( @@ -116,12 +130,19 @@ def test_docker_1_subm(tmp_path, docker): @need_singularity def test_singularity_1(tmp_path): """singularity env: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) + + def newcache(x): + makedir(tmp_path, x) cmd = ["whoami"] sing = Singularity(image="docker://alpine") - shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) - assert shelly.cmdline == " ".join(cmd) + shell_def = shell.define(cmd) + shelly = Task( + definition=shell_def, + submitter=Submitter(cache_dir=newcache("shelly")), + name="shelly", + ) + assert shell_def.cmdline == " ".join(cmd) env_res = sing.execute(shelly) shelly_env = ShellDef( @@ -144,12 +165,19 @@ def test_singularity_1(tmp_path): @need_singularity def test_singularity_1_subm(tmp_path, plugin): """docker env with submitter: simple command, no arguments""" - newcache = lambda x: makedir(tmp_path, x) + + def newcache(x): + makedir(tmp_path, x) cmd = ["whoami"] sing = Singularity(image="docker://alpine") - shelly = ShellDef(name="shelly", executable=cmd, cache_dir=newcache("shelly")) - assert shelly.cmdline == " ".join(cmd) + shell_def = shell.define(cmd) + shelly = Task( + definition=shell_def, + submitter=Submitter(cache_dir=newcache("shelly")), + name="shelly", + ) + assert shell_def.cmdline == " ".join(cmd) env_res = sing.execute(shelly) shelly_env = ShellDef( diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 3cf124defa..1478c417a7 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -39,11 +39,6 @@ from pydra.utils import exc_info_matches -def test_wf_no_input_spec(): - with pytest.raises(ValueError, match='Empty "Inputs" definition'): - Workflow(name="workflow") - - def test_wf_specinfo_input_spec(): input_spec = SpecInfo( name="Input", From dddf8a25035ce65d4b5c7715978d988729d6454b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 11:06:09 +1100 Subject: [PATCH 222/342] upped requirement on pydra-mrtrix to 3.0.4a17 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3872118e2a..8f7b31323e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ doc = [ "fileformats-extras >= v0.15.0a3", "fileformats-medimage >= v0.10.0a2", "fileformats-medimage-extras >= v0.10.0a", - "pydra-mrtrix3 >=3.0.4a14", + "pydra-mrtrix3 >=3.0.4a17", "sphinx_rtd_theme", "sphinx-click", "sphinxcontrib-apidoc ~=0.3.0", From b9221d723fb57ba1e3035d02425e87b17ff7977c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 11:09:30 +1100 Subject: [PATCH 223/342] more dep version changes --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8f7b31323e..0350deab85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ doc = [ "scipy", "fileformats-extras >= v0.15.0a3", "fileformats-medimage >= v0.10.0a2", - "fileformats-medimage-extras >= v0.10.0a", + "fileformats-medimage-extras >= v0.10.0a2", "pydra-mrtrix3 >=3.0.4a17", "sphinx_rtd_theme", "sphinx-click", From c8e25b1f44511c46e49a182bc5e31289e02f735d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 12:04:10 +1100 Subject: [PATCH 224/342] cleaned up deps and added ipywidgets to docs build --- .github/workflows/docs.yml | 8 ++++---- pyproject.toml | 37 +++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 64cea89122..6af95b988c 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -52,6 +52,10 @@ jobs: run: shell: bash -l {0} steps: + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install pandoc libnotify-bin - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow @@ -68,10 +72,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.x' - - name: Install Pandoc - run: | - sudo apt-get update - sudo apt-get install pandoc - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/pyproject.toml b/pyproject.toml index 0350deab85..056bbb3477 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,28 +41,30 @@ psij = ["psij-python"] dask = ["dask", "distributed"] dev = ["black", "pre-commit", "pydra[test]"] doc = [ - "packaging", - "sphinx ==6.2.1", - "sphinx-argparse", - "nbsphinx", + "fileformats-extras >= v0.15.0a3", + "fileformats-medimage >= v0.10.0a2", + "fileformats-medimage-extras >= v0.10.0a2", + "furo>=2022.2.14.1", "ipython", "ipykernel", + "ipywidgets", + "nbsphinx", + "nest_asyncio", "nibabel", "nilearn", + "numpy", + "numpydoc>=0.6.0", + "packaging", "pandas", "pandoc", - "numpy", - "scipy", - "fileformats-extras >= v0.15.0a3", - "fileformats-medimage >= v0.10.0a2", - "fileformats-medimage-extras >= v0.10.0a2", "pydra-mrtrix3 >=3.0.4a17", - "sphinx_rtd_theme", + "scipy", + "sphinx ==6.2.1", + "sphinx-argparse", "sphinx-click", + "sphinx_rtd_theme", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", - "furo>=2022.2.14.1", - "numpydoc>=0.6.0", ] test = [ "pytest >=6.2.5", @@ -81,24 +83,23 @@ test = [ "pympler", ] tutorial = [ + "fileformats-extras >= v0.15.0a3", + "fileformats-medimage >= v0.10.0a2", + "fileformats-medimage-extras >= v0.10.0a2", "jupyter", "jupyter_contrib_nbextensions", "jupytext", "jupyterlab", "matplotlib", - "nest_asyncio", "nbformat", "nbval", + "nest_asyncio", "nibabel", "nilearn", "numpy", "pandas", - "fileformats-extras >= v0.15.0a3", - "fileformats-medimage >= v0.10.0a2", - "fileformats-medimage-extras >= v0.10.0a2", - "pydra-mrtrix3 >=3.0.4a17", "psutil", - "pytest", + "pydra-mrtrix3 >=3.0.4a17", "scipy", "sh", ] From dc9e63831be1ebf7261eae949a3060e0d2de3338 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 12:15:36 +1100 Subject: [PATCH 225/342] upped fileformats dep version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 056bbb3477..4e1eb23cc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", - "fileformats >=0.15a3", + "fileformats >=0.15a5", "platformdirs >=2", ] license = { file = "LICENSE" } @@ -41,7 +41,7 @@ psij = ["psij-python"] dask = ["dask", "distributed"] dev = ["black", "pre-commit", "pydra[test]"] doc = [ - "fileformats-extras >= v0.15.0a3", + "fileformats-extras >= v0.15.0a5", "fileformats-medimage >= v0.10.0a2", "fileformats-medimage-extras >= v0.10.0a2", "furo>=2022.2.14.1", From 799a3848f622725c7cf1c7579ed9f52340c9bfa4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 12:24:34 +1100 Subject: [PATCH 226/342] added favicon to conf.py --- new-docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/new-docs/source/conf.py b/new-docs/source/conf.py index abef948238..0586c6374a 100644 --- a/new-docs/source/conf.py +++ b/new-docs/source/conf.py @@ -167,7 +167,7 @@ # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -# html_favicon = None +html_favicon = "_static/logo/pydra_logo.png" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, From 8c62ea78792072e3186b0593620978c4956e3649 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 12:28:05 +1100 Subject: [PATCH 227/342] relaxed fileformats dep version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4e1eb23cc4..81be96dc6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", - "fileformats >=0.15a5", + "fileformats >=0.15a4", "platformdirs >=2", ] license = { file = "LICENSE" } @@ -41,7 +41,7 @@ psij = ["psij-python"] dask = ["dask", "distributed"] dev = ["black", "pre-commit", "pydra[test]"] doc = [ - "fileformats-extras >= v0.15.0a5", + "fileformats-extras >= v0.15.0a4", "fileformats-medimage >= v0.10.0a2", "fileformats-medimage-extras >= v0.10.0a2", "furo>=2022.2.14.1", From bbdf072ceefd208f17e62adde020adf81a3de0e1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 13:04:17 +1100 Subject: [PATCH 228/342] setup virtual desktop to accept notifications --- .github/workflows/docs.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 6af95b988c..8bb6c090d8 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -55,7 +55,11 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install pandoc libnotify-bin + sudo apt-get install -y pandoc xvfb libnotify-bin + - name: Set up XVFB to handle desktp notifications + run: | + Xvfb :99 & + export DISPLAY=:99 - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow From 8f1623c6ee00e2f91c8c57d8edec1518aee9e9b0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 13:20:25 +1100 Subject: [PATCH 229/342] trying to get desktop notifications to work --- .github/workflows/docs.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 8bb6c090d8..29602bfd08 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -55,11 +55,14 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y pandoc xvfb libnotify-bin - - name: Set up XVFB to handle desktp notifications + sudo apt-get install -y pandoc xvfb libnotify-bin dunst dbus-x11 + - name: Set up OS to handle desktop notifications (used in adv.-exec. tutorial) run: | Xvfb :99 & export DISPLAY=:99 + eval "$(dbus-launch --sh-syntax)" + dunst & + notify-send "Test Notification" - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow From 76feb6914760d1360bf4ef3f69573d16a12f9cdb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 13:33:01 +1100 Subject: [PATCH 230/342] added additional comment to toymedianThreshold example --- new-docs/source/tutorial/6-workflow.ipynb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/new-docs/source/tutorial/6-workflow.ipynb b/new-docs/source/tutorial/6-workflow.ipynb index df0790f8ff..ecb8cd9d87 100644 --- a/new-docs/source/tutorial/6-workflow.ipynb +++ b/new-docs/source/tutorial/6-workflow.ipynb @@ -482,12 +482,17 @@ " @python.define\n", " def Median(mih: mrtrix3.ImageHeader) -> float:\n", " \"\"\"A bespoke function that reads the separate data file in the MRTrix3 image\n", - " header format (i.e. .mih) and calculates the median value.\"\"\"\n", + " header format (i.e. .mih) and calculates the median value.\n", + "\n", + " NB: We could use a MrStats task here, but this is just an example to show how\n", + " to use a bespoke function in a workflow.\n", + " \"\"\"\n", " dtype = MRTRIX2NUMPY_DTYPES[mih.metadata[\"datatype\"].strip()]\n", " data = np.frombuffer(Path.read_bytes(mih.data_file), dtype=dtype)\n", " return np.median(data)\n", "\n", " median = workflow.add(Median(mih=input_conversion.out_file))\n", + "\n", " threshold = workflow.add(\n", " MrThreshold(in_file=in_image, out_file=\"binary.mif\", abs=median.out),\n", " environment=Docker(\"mrtrix3/mrtrix3\", tag=\"latest\"),\n", From de837272231cc27a7844a37795a30a1abf4a85cb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 13:33:14 +1100 Subject: [PATCH 231/342] attempting to get virtual desktop notifications to work --- .github/workflows/docs.yml | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 29602bfd08..e60de835e6 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -52,17 +52,34 @@ jobs: run: shell: bash -l {0} steps: - - name: Install system dependencies + - name: Install Pandoc for NBSphinx run: | sudo apt-get update - sudo apt-get install -y pandoc xvfb libnotify-bin dunst dbus-x11 - - name: Set up OS to handle desktop notifications (used in adv.-exec. tutorial) + sudo apt-get install -y pandoc + - name: Install Dependencies for virtual notifications in Adv.-Exec Tutorial + run: | + sudo apt update + sudo apt install -y xvfb libnotify-bin dbus-x11 xfce4-notifyd + - name: Start Virtual Display (for notifications) run: | Xvfb :99 & export DISPLAY=:99 eval "$(dbus-launch --sh-syntax)" - dunst & - notify-send "Test Notification" + echo "DISPLAY=:99" >> $GITHUB_ENV + echo "DBUS_SESSION_BUS_ADDRESS=$DBUS_SESSION_BUS_ADDRESS" >> $GITHUB_ENV + - name: Start Notification Daemon (for notifications) + run: | + xfce4-notifyd & + sleep 2 # Give it some time to start + - name: Send Notification (test notifications) + run: | + notify-send "GitHub Runner Notification" "This is a test notification from GitHub Actions" + - name: Debug Running Processes (for notifications) + run: | + ps aux | grep notify + ps aux | grep xfce4-notifyd + dbus-monitor --session & + sleep 3 - uses: actions/checkout@v4 - name: Fetch tags run: git fetch --prune --unshallow From 8bae6d327a5bb97d5442d1f53cb46b4fd4071ccb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 14:20:49 +1100 Subject: [PATCH 232/342] nbsphinx errors will fail docs build --- new-docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/new-docs/source/conf.py b/new-docs/source/conf.py index 0586c6374a..683a83a0ba 100644 --- a/new-docs/source/conf.py +++ b/new-docs/source/conf.py @@ -50,7 +50,7 @@ ] -nbsphinx_allow_errors = True +nbsphinx_allow_errors = False # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From e99f083964a1b54e6378b270c6d2349328d925c6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 15:01:40 +1100 Subject: [PATCH 233/342] dropped 2nd level GLM as too complex for now --- .../{first-level-glm.ipynb => glm.ipynb} | 26 +- new-docs/source/examples/two-level-glm.ipynb | 1207 ----------------- new-docs/source/index.rst | 6 +- pyproject.toml | 2 + 4 files changed, 24 insertions(+), 1217 deletions(-) rename new-docs/source/examples/{first-level-glm.ipynb => glm.ipynb} (97%) delete mode 100644 new-docs/source/examples/two-level-glm.ipynb diff --git a/new-docs/source/examples/first-level-glm.ipynb b/new-docs/source/examples/glm.ipynb similarity index 97% rename from new-docs/source/examples/first-level-glm.ipynb rename to new-docs/source/examples/glm.ipynb index 914ac6b9e3..e82b384686 100644 --- a/new-docs/source/examples/first-level-glm.ipynb +++ b/new-docs/source/examples/glm.ipynb @@ -62,11 +62,13 @@ "from scipy.stats import norm\n", "\n", "import nibabel as nib\n", - "from nilearn.datasets import (\n", - " fetch_openneuro_dataset_index,\n", - " fetch_openneuro_dataset,\n", - " select_from_index,\n", - " )\n", + "# These functions were removed within nilearn, so this notebook needs to be rewritten\n", + "# to use the 'openneuro' module instead\n", + "# from nilearn.datasets import (\n", + "# fetch_openneuro_dataset_index,\n", + "# fetch_openneuro_dataset,\n", + "# select_from_index,\n", + "# )\n", "from nilearn.interfaces.fsl import get_design_from_fslmat\n", "from nilearn.glm.first_level import first_level_from_bids\n", "from nilearn.reporting import get_clusters_table, make_glm_report\n", @@ -750,9 +752,21 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "wf12", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" } }, "nbformat": 4, diff --git a/new-docs/source/examples/two-level-glm.ipynb b/new-docs/source/examples/two-level-glm.ipynb deleted file mode 100644 index 4285234d37..0000000000 --- a/new-docs/source/examples/two-level-glm.ipynb +++ /dev/null @@ -1,1207 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "058a991d", - "metadata": {}, - "source": [ - "# Two-Level GLM" - ] - }, - { - "cell_type": "markdown", - "id": "2b4f98e0", - "metadata": {}, - "source": [ - "In this tutorial, which is adapted from the Nilearn docs, we demonstrate how to write pydra tasks for the first level (subject-level) GLM and the second level (group-level) GLM in Nilearn. We use the data from [Balloon Analog Risk-taking Task](https://openneuro.org/datasets/ds000001/versions/1.0.0). \n", - "Basic information about this dataset:\n", - "- 16 subjects\n", - "- 3 runs\n", - "- functional scan TR: 2.3 \n", - "- num of functional scan: 300" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b047e37b", - "metadata": {}, - "outputs": [], - "source": [ - "import nest_asyncio\n", - "nest_asyncio.apply()" - ] - }, - { - "cell_type": "markdown", - "id": "897522ee", - "metadata": {}, - "source": [ - "## Preparation\n", - "\n", - "Import packages that will be used globally and set up output directory" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9c4743db", - "metadata": {}, - "outputs": [], - "source": [ - "import warnings\n", - "import sys \n", - "if not sys.warnoptions:\n", - " warnings.simplefilter(\"ignore\")\n", - "\n", - "import os\n", - "import glob\n", - "import datetime\n", - "import random\n", - "from pydra.design import python, workflow\n", - "import typing as ty\n", - "from pathlib import Path\n", - "import datalad.api as dl\n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import nibabel as nib\n", - "from scipy.stats import norm\n", - "from nilearn.interfaces.fmriprep import load_confounds_strategy\n", - "from nilearn.image import load_img, get_data, math_img, threshold_img\n", - "from nilearn.glm.first_level import make_first_level_design_matrix, FirstLevelModel\n", - "from nilearn.glm.second_level import SecondLevelModel, non_parametric_inference\n", - "from nilearn.glm.contrasts import compute_fixed_effects\n", - "from nilearn.plotting import plot_stat_map, plot_glass_brain" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "270ec541", - "metadata": {}, - "outputs": [], - "source": [ - "# get current directory\n", - "pydra_tutorial_dir = os.path.dirname(os.getcwd())\n", - "\n", - "# set up output directory\n", - "workflow_dir = Path(pydra_tutorial_dir) / 'outputs'\n", - "workflow_out_dir = workflow_dir / '9_glm' /'results'\n", - "\n", - "# create folders if not exit\n", - "os.makedirs(workflow_out_dir, exist_ok=True)" - ] - }, - { - "cell_type": "markdown", - "id": "55fad822", - "metadata": {}, - "source": [ - "### Download the data\n", - "\n", - "[DataLad](http://handbook.datalad.org/en/latest/index.htmlhttp://handbook.datalad.org/en/latest/index.html) is often used in those cases to download data. Here we use its [Python API](http://docs.datalad.org/en/latest/modref.htmlhttp://docs.datalad.org/en/latest/modref.html).\n", - "\n", - "We need the following data: \n", - "\n", - "1. event information (raw data)\n", - "2. preprocessed image data (fmriprep)\n", - "3. masks (fmriprep)\n", - "4. confounds (fmriprep)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a99bf091", - "metadata": { - "tags": [ - "remove-output" - ] - }, - "outputs": [], - "source": [ - "fmriprep_path = workflow_dir / '7_glm'/ 'data'\n", - "rawdata_path = workflow_dir / '7_glm' / 'raw_data'\n", - "os.makedirs(fmriprep_path, exist_ok=True)\n", - "os.makedirs(rawdata_path, exist_ok=True)\n", - "# Install datasets to specific datapaths\n", - "fmriprep_url = 'https://github.com/OpenNeuroDerivatives/ds000001-fmriprep.git'\n", - "rawdata_url = 'https://github.com/OpenNeuroDatasets/ds000001.git'\n", - "dl.install(source=rawdata_url, path=rawdata_path)\n", - "dl.install(source=fmriprep_url, path=fmriprep_path)" - ] - }, - { - "cell_type": "markdown", - "id": "67484d6a", - "metadata": {}, - "source": [ - "### Get data for each subject\n", - "\n", - "By `datalad.api.install`, datalad downloads all symlinks without storing the actual data locally. We can then use `datalad.api.get` to get the data we need for our analysis. \n", - "We need to get four types of data from two folders:\n", - "\n", - "1. event_info: `*events.tsv` from `rawdata_path`\n", - "2. bold: `*space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz` from `fmriprep_path`\n", - "3. mask: `*space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz` from `fmriprep_path`\n", - "4. confounds: `*desc-confounds_timeseries.tsv` from `fmriprep_path` (this is implicitly needed by `load_confounds_strategy`)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d1fbfa3", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "@python.define(outputs=['subj_id','subj_events', 'subj_imgs', 'subj_masks'])\n", - "def GetSubjData(subj_id: int) -> tuple[int, list, list, list]:\n", - " print(f\"\\nDownload data for subject-{subj_id}\")\n", - " # get events.tsv \n", - " subj_events = glob.glob(os.path.join(rawdata_path, 'sub-%02d' % subj_id, 'func', '*events.tsv'))\n", - " subj_events.sort()\n", - " for i in subj_events:\n", - " dl.get(i, dataset=rawdata_path)\n", - " # get bold\n", - " subj_imgs = glob.glob(os.path.join(fmriprep_path, 'sub-%02d' % subj_id, 'func', '*space-MNI152NLin2009cAsym_res-2_desc-preproc_bold.nii.gz'))\n", - " subj_imgs.sort()\n", - " for i in subj_imgs:\n", - " dl.get(i, dataset=fmriprep_path)\n", - " # get mask\n", - " subj_masks = glob.glob(os.path.join(fmriprep_path, 'sub-%02d' % subj_id, 'func', '*space-MNI152NLin2009cAsym_res-2_desc-brain_mask.nii.gz'))\n", - " subj_masks.sort()\n", - " for i in subj_masks:\n", - " dl.get(i, dataset=fmriprep_path)\n", - " # get confounds list\n", - " subj_confounds = glob.glob(os.path.join(fmriprep_path, 'sub-%02d' % subj_id, 'func', '*desc-confounds_timeseries.tsv'))\n", - " subj_confounds.sort()\n", - " for i in subj_confounds:\n", - " dl.get(i, dataset=fmriprep_path)\n", - " return subj_id, subj_events, subj_imgs, subj_masks" - ] - }, - { - "cell_type": "markdown", - "id": "ef024e95", - "metadata": {}, - "source": [ - "## First-Level GLM\n", - "\n", - "The first level GLM has two parts:\n", - "- conduct GLM for each run on every subject\n", - "- average across runs for each subject with a fixed-effect model" - ] - }, - { - "cell_type": "markdown", - "id": "4c652b28", - "metadata": {}, - "source": [ - "### Get the first-level design matrix\n", - "\n", - "The design matrix is a _M(row)_ x _N(columns)_ matrix. _M_ corresponds to the number of _tr_, while _N_ corresponds to event conditions + confounds." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "83b52e93", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'dm_path': str, 'run_id': int})\n", - "def GetFirstLevelDm(tr: float, n_scans: int, hrf_model: str, subj_id: int, run_id: int, subj_imgs: list, subj_events: list):\n", - " print(f\"\\nGet subject-{subj_id}, run-{run_id} firstlevel GLM design matrix...\\n\")\n", - " # read event file\n", - " run_img = subj_imgs[run_id-1]\n", - " run_event = subj_events[run_id-1]\n", - " event = pd.read_csv(run_event, sep='\\t').fillna(0)\n", - " event = event[['onset', 'duration', 'trial_type']]\n", - " # get list of confounds directly from fmriprepped bold\n", - " confounds = load_confounds_strategy(run_img, denoise_strategy='simple')[0]\n", - " frame_times = np.arange(n_scans) * tr\n", - " design_matrix = make_first_level_design_matrix(frame_times, event, \n", - " hrf_model=hrf_model,\n", - " add_regs=confounds) \n", - "\n", - " # make sure all design matrices have the same length of column\n", - " # if you have a block design, this is not needed.\n", - " # 39 = 4(events) + 34(confounds) + 13(drift) + 1(constant)\n", - " assert design_matrix.shape[1] == 52, \"This design matrix has the wrong column number\"\n", - " # sort the column order alphabetical for contrasts\n", - " design_matrix = design_matrix.reindex(sorted(design_matrix.columns), axis=1)\n", - " dm_path = os.path.join(workflow_out_dir, 'sub-%s_run-%s_designmatrix.csv' % (subj_id, run_id))\n", - " design_matrix.to_csv(dm_path, index=None)\n", - " return dm_path, run_id" - ] - }, - { - "cell_type": "markdown", - "id": "b9d8c639", - "metadata": {}, - "source": [ - "### Set up the first level contrasts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "02c14978", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'contrasts': dict})\n", - "def SetContrast(subj_id: int, run_id: int, dm_path: str):\n", - " print(f\"\\nSet firstlevel contrast for subject-{subj_id}, run-{run_id} ...\\n\") \n", - " design_matrix = pd.read_csv(dm_path)\n", - " contrast_matrix = np.eye(design_matrix.shape[1])\n", - " basic_contrasts = dict([(column, contrast_matrix[i])\n", - " for i, column in enumerate(design_matrix.columns)])\n", - " contrasts = {\n", - " 'pumps-control': basic_contrasts['pumps_demean'] - basic_contrasts['control_pumps_demean'],\n", - " 'control-pumps': -basic_contrasts['control_pumps_demean'] + basic_contrasts['pumps_demean'],\n", - " 'pumps-baseline': basic_contrasts['pumps_demean'],\n", - " 'cash-baseline': basic_contrasts['cash_demean'],\n", - " 'explode-baseline': basic_contrasts['explode_demean']\n", - " }\n", - " return contrasts" - ] - }, - { - "cell_type": "markdown", - "id": "c3842a52", - "metadata": {}, - "source": [ - "### Fit the first level GLM" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "97c51941", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'effect_size_path_dict': dict, 'effect_variance_path_dict': dict})\n", - "def FirstLevelEstimation(\n", - " subj_id: int,\n", - " run_id: int,\n", - " subj_imgs: list,\n", - " subj_masks: list,\n", - " smoothing_fwhm:float,\n", - " dm_path: str,\n", - " contrasts: dict\n", - "):\n", - " print(f\"\\nStart firstlevel estimation for subject-{subj_id}, run-{run_id} ...\\n\")\n", - " \n", - " # subsample img to reduce memory\n", - " run_img = subj_imgs[run_id-1]\n", - " img = load_img(run_img)\n", - " img_data = get_data(run_img)[::2,::2,::2]\n", - " new_img = nib.Nifti1Image(img_data, img.affine)\n", - " run_mask = subj_masks[run_id-1]\n", - " print('Fit the firstlevel model...')\n", - " first_level_model = FirstLevelModel(mask_img=run_mask, smoothing_fwhm=smoothing_fwhm)\n", - " dm= pd.read_csv(dm_path)\n", - " first_level_model = first_level_model.fit(new_img, design_matrices=dm)\n", - " print('Computing contrasts...')\n", - " effect_size_path_dict = dict.fromkeys(contrasts.keys())\n", - " effect_variance_path_dict = dict.fromkeys(contrasts.keys())\n", - " for index, (contrast_id, contrast_val) in enumerate(contrasts.items()):\n", - " print(' Contrast % 2i out of %i: %s' % (\n", - " index + 1, len(contrasts), contrast_id))\n", - " # Estimate the contasts. Note that the model implicitly computes a fixed\n", - " # effect across the two sessions\n", - " res = first_level_model.compute_contrast(contrast_val, output_type='all')\n", - " # write the resulting stat images to file\n", - " effect_size_path = os.path.join(workflow_out_dir, 'sub-%s_run-%s_contrast-%s_effect_size.nii.gz' % (subj_id, run_id, contrast_id))\n", - " effect_variance_path = os.path.join(workflow_out_dir, 'sub-%s_run-%s_contrast-%s_effect_varaince.nii.gz' % (subj_id, run_id, contrast_id))\n", - " effect_size_path_dict[contrast_id] = effect_size_path\n", - " effect_variance_path_dict[contrast_id] = effect_variance_path\n", - " res['effect_size'].to_filename(effect_size_path)\n", - " res['effect_variance'].to_filename(effect_variance_path)\n", - " \n", - " return effect_size_path_dict, effect_variance_path_dict" - ] - }, - { - "cell_type": "markdown", - "id": "1565da8f", - "metadata": {}, - "source": [ - "### Create the first level GLM workflow\n", - "\n", - "This workflow include GLM for each run." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "202433ba", - "metadata": {}, - "outputs": [], - "source": [ - "# initiate the first-level GLM workflow\n", - "@workflow.define(outputs=[\n", - " 'first_level_contrast',\n", - " 'first_level_effect_size_list',\n", - " 'first_level_effect_variance_list'\n", - "])\n", - "def GLMWorkflow(\n", - " subj_id,\n", - " run_id,\n", - " subj_imgs,\n", - " subj_events,\n", - " subj_masks,\n", - " tr,\n", - " n_scans,\n", - " hrf_model,\n", - " smoothing_fwhm\n", - "):\n", - " \n", - " # add task - get_firstlevel_dm\n", - " get_firstlevel_dm = workflow.add(\n", - " GetFirstLevelDm(\n", - " tr = tr, \n", - " n_scans = n_scans, \n", - " hrf_model = hrf_model, \n", - " subj_id = subj_id, \n", - " run_id = run_id, \n", - " subj_imgs = subj_imgs, \n", - " subj_events = subj_events,\n", - " )\n", - " )\n", - "\n", - " # add task - set_contrast\n", - " set_contrast = workflow.add(\n", - " SetContrast(\n", - " subj_id = subj_id,\n", - " run_id = get_firstlevel_dm.run_id,\n", - " dm_path = get_firstlevel_dm.dm_path\n", - " )\n", - " )\n", - "\n", - " # add task - firstlevel_estimation\n", - " firstlevel_estimation = workflow.add(\n", - " FirstLevelEstimation(\n", - " subj_id = subj_id, \n", - " run_id = get_firstlevel_dm.run_id, \n", - " subj_imgs = subj_imgs, \n", - " subj_masks = subj_masks,\n", - " smoothing_fwhm = smoothing_fwhm, \n", - " dm_path = get_firstlevel_dm.dm_path, \n", - " contrasts = set_contrast.contrasts\n", - " )\n", - " )\n", - "\n", - " # specify output\n", - " return (set_contrast.contrasts,\n", - " firstlevel_estimation.effect_size_path_dict,\n", - " firstlevel_estimation.effect_variance_path_dict,\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "db735ca0", - "metadata": {}, - "source": [ - "### Compute fixed effects\n", - "\n", - "Before we move to the second(group) level, we need to average results from all three runs from a fixed effect model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0e42da7a", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(\n", - " outputs={'fixed_fx_contrast_path_dict': dict, 'fixed_fx_variance_path_dict': dict, 'fixed_fx_ttest_path_dict': dict},\n", - ")\n", - "def GetFixedEffcts(\n", - " subj_id: int,\n", - " subj_masks: list,\n", - " contrasts: list,\n", - " effect_size_path_dict_list: list,\n", - " effect_variance_path_dict_list: list\n", - "):\n", - " print(f\"contrasts: {contrasts}\")\n", - " print(f'Compute fixed effects for subject-{subj_id}...')\n", - " # average mask across three runs\n", - " mean_mask = math_img('np.mean(img, axis=-1)', img=subj_masks)\n", - " # binarize the mean mask\n", - " mask = math_img('img > 0', img=mean_mask)\n", - " fixed_fx_contrast_path_dict =dict.fromkeys(contrasts[0].keys())\n", - " fixed_fx_variance_path_dict = dict.fromkeys(contrasts[0].keys())\n", - " fixed_fx_ttest_path_dict = dict.fromkeys(contrasts[0].keys())\n", - " for index, (contrast_id, contrast_val) in enumerate(contrasts[0].items()):\n", - " print(' Contrast % 2i out of %i: %s' % (index + 1, len(contrasts[0]), contrast_id))\n", - " contrast_imgs = [nib.load(img_dict[contrast_id]) for img_dict in effect_size_path_dict_list]\n", - " variance_imgs = [nib.load(img_dict[contrast_id]) for img_dict in effect_variance_path_dict_list]\n", - " fixed_fx_contrast, fixed_fx_variance, fixed_fx_ttest = compute_fixed_effects(contrast_imgs, variance_imgs, mask)\n", - " \n", - " effect_size_path = os.path.join(workflow_out_dir, 'sub-%s_contrast-%s_fx_effect_size.nii.gz' % (subj_id, contrast_id))\n", - " variance_path = os.path.join(workflow_out_dir, 'sub-%s_contrast-%s_fx_effect_varaince.nii.gz' % (subj_id, contrast_id))\n", - " ttest_path = os.path.join(workflow_out_dir, 'sub-%s_contrast-%s_ttest_map.nii.gz' % (subj_id, contrast_id))\n", - " fixed_fx_contrast_path_dict[contrast_id] = effect_size_path\n", - " fixed_fx_variance_path_dict[contrast_id] = variance_path\n", - " fixed_fx_ttest_path_dict[contrast_id] = ttest_path\n", - " \n", - " fixed_fx_contrast.to_filename(effect_size_path)\n", - " fixed_fx_variance.to_filename(variance_path)\n", - " fixed_fx_ttest.to_filename(ttest_path)\n", - " return fixed_fx_contrast_path_dict, fixed_fx_variance_path_dict, fixed_fx_ttest_path_dict" - ] - }, - { - "cell_type": "markdown", - "id": "b9210376", - "metadata": {}, - "source": [ - "### Create the fixed effect workflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f827e8c", - "metadata": {}, - "outputs": [], - "source": [ - "# initiate the fixed effect GLM workflow\n", - "@workflow.define(\n", - " outputs=['first_level_contrast',\n", - " 'fx_effect_size_list',\n", - " 'fx_effect_variance_list',\n", - " 'fx_t_test_list',\n", - " ]\n", - ")\n", - "def FixedEffectWorkflow(\n", - " subj_id,\n", - " run_id,\n", - " tr,\n", - " n_scans,\n", - " hrf_model,\n", - " smoothing_fwhm,\n", - "):\n", - "\n", - " # add task - get_subj_file\n", - " get_subjdata = workflow.add(\n", - " GetSubjData(\n", - " subj_id = subj_id, \n", - " )\n", - " )\n", - "\n", - " wf_firstlevel = workflow.add(\n", - " GLMWorkflow(\n", - " subj_id=get_subjdata.subj_id,\n", - " run_id=run_id,\n", - " tr=tr,\n", - " n_scans=n_scans,\n", - " hrf_model=hrf_model,\n", - " smoothing_fwhm=smoothing_fwhm,\n", - " subj_imgs=get_subjdata.subj_imgs,\n", - " subj_events=get_subjdata.subj_events,\n", - " subj_masks=get_subjdata.subj_masks,\n", - " )\n", - " )\n", - " \n", - " get_fixed_effcts = workflow.add(\n", - " GetFixedEffcts(\n", - " subj_id=get_subjdata.subj_id, \n", - " subj_masks=get_subjdata.subj_masks,\n", - " contrasts=wf_firstlevel.first_level_contrast, \n", - " effect_size_path_dict_list=wf_firstlevel.first_level_effect_size_list, \n", - " effect_variance_path_dict_list=wf_firstlevel.first_level_effect_variance_list\n", - " )\n", - " )\n", - "\n", - " # specify output\n", - " return (\n", - " wf_firstlevel.first_level_contrast,\n", - " get_fixed_effcts.fixed_fx_contrast_path_dict,\n", - " get_fixed_effcts.fixed_fx_variance_path_dict,\n", - " get_fixed_effcts.fixed_fx_ttest_path_dict,\n", - " )\n" - ] - }, - { - "cell_type": "markdown", - "id": "8cef3b93", - "metadata": {}, - "source": [ - "## Second-Level GLM\n", - "\n", - "The second level GLM, as known as the group level, averages results across subjects, containing the following steps:\n", - "- construct design matrix\n", - "- fit the second-level GLM\n", - "- statistical testing" - ] - }, - { - "cell_type": "markdown", - "id": "594069ce", - "metadata": {}, - "source": [ - "### Get the second level design matrix\n", - "\n", - "This is a one-group design. So we need a design matrix for a one-sample test.\n", - "\n", - "The design matrix is a single column of ones, corresponding to the model intercept." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "39b10152", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'design_matrix': ty.Any})\n", - "def GetSecondLevelDm(n_subj: int):\n", - " t1 = datetime.datetime.now()\n", - " print(\"\\nGet secondlevel design matrix ...\\n\")\n", - " design_matrix = pd.DataFrame([1] * n_subj,columns=['intercept'])\n", - " return design_matrix" - ] - }, - { - "cell_type": "markdown", - "id": "38be698e", - "metadata": {}, - "source": [ - "### Fit the second level GLM\n", - "\n", - "Here, we use the list of FirstLevel z-maps as the input for the SecondLevelModel." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "336cb11a", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'secondlevel_mask': ty.Any, 'stat_maps_dict': dict})\n", - "def SecondLevelEstimation(firstlevel_stats_list: list, design_matrix, firstlevel_contrast: list): \n", - " print(\"\\nStart secondlevel estimation ...\\n\")\n", - " stat_maps_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", - " for index, (contrast_id, contrast_val) in enumerate(firstlevel_contrast[0][0].items()):\n", - " print(' Contrast % 2i out of %i: %s' % (\n", - " index + 1, len(firstlevel_contrast[0][0]), contrast_id))\n", - " second_level_input = [nib.load(stats_dict[contrast_id]) for stats_dict in firstlevel_stats_list]\n", - " second_level_model = SecondLevelModel()\n", - " second_level_model = second_level_model.fit(second_level_input, design_matrix=design_matrix)\n", - " secondlevel_mask = second_level_model.masker_.mask_img_\n", - " \n", - " stats = second_level_model.compute_contrast(output_type='all')\n", - " # write the resulting stat images to file\n", - " z_image_path = os.path.join(workflow_out_dir, 'secondlevel_contrast-%s_z_map.nii.gz' % contrast_id)\n", - " stat_maps_dict[contrast_id] = stats\n", - " stats['z_score'].to_filename(z_image_path)\n", - " plot_path = os.path.join(workflow_out_dir, 'secondlevel_unthresholded_contrast-%s_zmap.jpg' % contrast_id)\n", - " plot_glass_brain(stats['z_score'],\n", - " colorbar=True,\n", - " threshold=norm.isf(0.001),\n", - " title='Unthresholded z map',\n", - " output_file=plot_path)\n", - " return secondlevel_mask, stat_maps_dict" - ] - }, - { - "cell_type": "markdown", - "id": "c2c8e7d2", - "metadata": {}, - "source": [ - "### Create the second level GLM workflow" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e72933d", - "metadata": {}, - "outputs": [], - "source": [ - "# initiate the first-level GLM workflow\n", - "@workflow.define(\n", - " outputs=[\n", - " 'second_level_designmatrix',\n", - " 'second_level_mask',\n", - " 'second_level_stats_map',\n", - " ]\n", - ")\n", - "def SecondLevelWorkflow(\n", - " n_subj,\n", - " firstlevel_stats_list, \n", - " firstlevel_contrast,\n", - " n_perm,\n", - "):\n", - "\n", - " # add task - get_secondlevel_dm\n", - " get_secondlevel_dm = workflow.add(GetSecondLevelDm(n_subj=n_subj))\n", - "\n", - " # add task - secondlevel_estimation\n", - " secondlevel_estimation = workflow.add(\n", - " SecondLevelEstimation(\n", - " firstlevel_stats_list=firstlevel_stats_list, \n", - " design_matrix=get_secondlevel_dm.design_matrix, \n", - " firstlevel_contrast=firstlevel_contrast\n", - " )\n", - " )\n", - "\n", - " # specify output\n", - " return (\n", - " get_secondlevel_dm.design_matrix,\n", - " secondlevel_estimation.secondlevel_mask,\n", - " secondlevel_estimation.stat_maps_dict\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "e3992216", - "metadata": {}, - "source": [ - "## Statistical Testing\n", - "\n", - "In this section, we present different ways of doing statistical testing\n", - "\n", - "1. Cluster-thresholding without multiple comparison\n", - "2. Multiple comparison using FDR\n", - "3. Parametric testing\n", - "4. NonParametric testing" - ] - }, - { - "cell_type": "markdown", - "id": "81eecd78", - "metadata": {}, - "source": [ - "### Cluster-thresholding and Plot without multiple comparison\n", - "\n", - "Threshold the resulting map without multiple comparisons correction, abs(z) > 3.29 (equivalent to p < 0.001), cluster size > 10 voxels." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b566f1d3", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", - "def ClusterThresholding(stat_maps_dict: dict, threshold: float, cluster_threshold: int):\n", - " t1 = datetime.datetime.now()\n", - " print(\"\\nStart cluster thresholding ...\\n\")\n", - " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", - " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", - " for index, (stats_id, stats_val) in enumerate(stat_maps_dict.items()):\n", - " print('Contrast % 2i out of %i: %s' % (\n", - " index + 1, len(stat_maps_dict), stats_id))\n", - " thresholded_map = threshold_img(\n", - " img = stats_val['z_score'],\n", - " threshold=threshold,\n", - " cluster_threshold=cluster_threshold,\n", - " two_sided=True,\n", - " )\n", - " thresholded_map_path = os.path.join(workflow_out_dir, 'secondlevel_cluster_thresholded_contrast-%s_z_map.nii.gz' % stats_id)\n", - " thresholded_map_dict[stats_id] = thresholded_map_path\n", - " thresholded_map.to_filename(thresholded_map_path)\n", - " plot_path = os.path.join(workflow_out_dir, \n", - " 'secondlevel_cluster_thresholded_contrast-%s_zmap.jpg' % stats_id)\n", - " plot_contrast_dict[stats_id] = plot_path\n", - " plot_stat_map(thresholded_map,\n", - " title='Cluster Thresholded z map',\n", - " output_file=plot_path)\n", - " print(\"\\nCluster thresholding is done\")\n", - " return thresholded_map_dict, plot_contrast_dict" - ] - }, - { - "cell_type": "markdown", - "id": "7c830b46", - "metadata": {}, - "source": [ - "### Multiple comparison and Plot\n", - "\n", - "We have the following choices:\n", - "- `fdr`: False Discovery Rate (FDR <.05) and no cluster-level threshold\n", - "- `fpr`: False Positive Rate\n", - "- `bonferroni`\n", - "\n", - "More details see [here](https://nilearn.github.io/stable/modules/generated/nilearn.glm.threshold_stats_img.html#nilearn.glm.threshold_stats_img)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ce280b52", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", - "def MultipleComparison(stat_maps_dict: dict, alpha: float, height_control: str):\n", - " print(\"\\nStart multiple comparison ...\\n\")\n", - " from nilearn.glm import threshold_stats_img\n", - " from nilearn.plotting import plot_stat_map\n", - " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", - " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", - " for index, (stats_id, stats_val) in enumerate(stat_maps_dict.items()):\n", - " print('Contrast % 2i out of %i: %s' % (\n", - " index + 1, len(stat_maps_dict), stats_id))\n", - " thresholded_map, threshold = threshold_stats_img(\n", - " stat_img=stats_val['z_score'], \n", - " alpha=alpha, \n", - " height_control=height_control)\n", - " thresholded_map_path = os.path.join(workflow_out_dir, \n", - " 'secondlevel_multiple_comp_corrected_contrast-%s_z_map.nii.gz' % stats_id)\n", - " thresholded_map_dict[stats_id] = thresholded_map_path\n", - " thresholded_map.to_filename(thresholded_map_path)\n", - " plot_path = os.path.join(workflow_out_dir, \n", - " 'secondlevel_multiple_comp_corrected_contrast-%s_zmap.jpg' % stats_id)\n", - " plot_contrast_dict[stats_id] = plot_path\n", - " plot_stat_map(thresholded_map,\n", - " title='Thresholded z map, expected fdr = .05',\n", - " threshold=threshold, \n", - " output_file=plot_path)\n", - " print(\"\\nMultiple comparison is done\")\n", - " return thresholded_map_dict, plot_contrast_dict" - ] - }, - { - "cell_type": "markdown", - "id": "f75d4d17", - "metadata": {}, - "source": [ - "### Parametric test & Plot\n", - "\n", - "We threshold the second level contrast at uncorrected p < 0.001.\n", - "\n", - "A nilearn example see [here](https://nilearn.github.io/dev/auto_examples/05_glm_second_level/plot_second_level_one_sample_test.html)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6bd90cb2", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", - "def ParametricTest(stat_maps_dict: dict, secondlevel_mask: ty.Any):\n", - " print(\"\\nStart parametric test ...\\n\")\n", - " thresholded_map_dict = dict.fromkeys(stat_maps_dict.keys())\n", - " plot_contrast_dict = dict.fromkeys(stat_maps_dict.keys())\n", - " for index, (stats_id, stats_val) in enumerate(stat_maps_dict.items()):\n", - " print('Contrast % 2i out of %i: %s' % (\n", - " index + 1, len(stat_maps_dict), stats_id))\n", - " p_val = stats_val['p_value']\n", - " n_voxels = np.sum(get_data(img=secondlevel_mask))\n", - " # Correcting the p-values for multiple testing and taking negative logarithm\n", - " neg_log_pval = math_img(\"-np.log10(np.minimum(1, img * {}))\"\n", - " .format(str(n_voxels)),\n", - " img=p_val)\n", - " \n", - " thresholded_map_path = os.path.join(workflow_out_dir, 'secondlevel_Parametric_thresholded_contrast-%s_z_map.nii.gz' % stats_id)\n", - " thresholded_map_dict[stats_id] = thresholded_map_path\n", - " neg_log_pval.to_filename(thresholded_map_path)\n", - " \n", - " # Since we are plotting negative log p-values and using a threshold equal to 1,\n", - " # it corresponds to corrected p-values lower than 10%, meaning that there is\n", - " # less than 10% probability to make a single false discovery (90% chance that\n", - " # we make no false discovery at all). This threshold is much more conservative\n", - " # than the previous one.\n", - " title = ('parametric test (FWER < 10%)')\n", - " plot_path = os.path.join(workflow_out_dir, \n", - " 'secondlevel_Parametric_thresholded_contrast-%s_zmap.jpg' % stats_id)\n", - " plot_contrast_dict[stats_id] = plot_path\n", - " plot_stat_map(\n", - " neg_log_pval, colorbar=True,\n", - " title=title, output_file=plot_path)\n", - " print(\"\\nParametric test is done\")\n", - " return thresholded_map_dict, plot_contrast_dict" - ] - }, - { - "cell_type": "markdown", - "id": "37358f38", - "metadata": {}, - "source": [ - "### Non-Parametric test & Plot\n", - "\n", - "Here we compute the (corrected) negative log p-values with permutation test." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5482dcd1", - "metadata": {}, - "outputs": [], - "source": [ - "@python.define(outputs={'thresholded_map_dict': dict, 'plot_contrast_dict': dict})\n", - "def NonparametricTest(\n", - " firstlevel_stats_list: list,\n", - " smoothing_fwhm: float,\n", - " design_matrix: ty.Any,\n", - " firstlevel_contrast: list,\n", - " n_perm: int\n", - "):\n", - " print(\"\\nStart nonparametric test ...\\n\")\n", - " thresholded_map_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", - " plot_contrast_dict = dict.fromkeys(firstlevel_contrast[0][0].keys())\n", - " for index, (contrast_id, contrast_val) in enumerate(firstlevel_contrast[0][0].items()):\n", - " print(' Contrast % 2i out of %i: %s' % (\n", - " index + 1, len(firstlevel_contrast[0][0]), contrast_id))\n", - " # here we set threshold as none to do voxel-level FWER-correction.\n", - " second_level_input = [nib.load(stats_dict[contrast_id]) for stats_dict in firstlevel_stats_list]\n", - " neg_log_pvals_permuted_ols_unmasked = \\\n", - " non_parametric_inference(second_level_input=second_level_input, design_matrix=design_matrix,\n", - " model_intercept=True, n_perm=n_perm,\n", - " two_sided_test=False, smoothing_fwhm=smoothing_fwhm, n_jobs=1)\n", - " thresholded_map_path = os.path.join(workflow_out_dir, 'secondlevel_permutation_contrast-%s_z_map.nii.gz' % contrast_id)\n", - " thresholded_map_dict[contrast_id] = thresholded_map_path\n", - " neg_log_pvals_permuted_ols_unmasked.to_filename(thresholded_map_path)\n", - " # here I actually have more than one contrast\n", - " title = ('permutation test (FWER < 10%)')\n", - " plot_path = os.path.join(workflow_out_dir, 'secondlevel_permutation_contrast-%s_zmap.jpg' % contrast_id)\n", - " plot_contrast_dict[contrast_id] = plot_path\n", - " plot_stat_map(\n", - " neg_log_pvals_permuted_ols_unmasked, colorbar=True, \n", - " title=title, output_file=plot_path)\n", - " print(\"\\nPermutation is done\")\n", - " return thresholded_map_dict, plot_contrast_dict" - ] - }, - { - "cell_type": "markdown", - "id": "54c2201a", - "metadata": {}, - "source": [ - "## The Ultimate Workflow\n", - "\n", - "Now, let's connect all tasks and workflows together.\n", - "\n", - "Here we randomly choose **5** subjects to perform the analysis. \n", - "\n", - "For computational time, we set `n_perm=100`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e40304a3", - "metadata": {}, - "outputs": [], - "source": [ - "@workflow.define(\n", - " outputs={\n", - " 'cluster_thresholded_map_dict': dict,\n", - " 'cluster_plot_contrast_dict': dict,\n", - " 'mult_comp_thresholded_map_dict': dict,\n", - " 'mult_comp_plot_contrast_dict': dict,\n", - " 'parametric_thresholded_map_dict': dict,\n", - " 'parametric_plot_contrast_dict': dict,\n", - " 'nonparametric_thresholded_map_dict': dict,\n", - " 'nonparametric_plot_contrast_dict': dict,\n", - " }\n", - ")\n", - "def TwoLevelGLM(\n", - " n_subj: int,\n", - " run_id: list[int] = [1,2],\n", - " tr: float = 2.3,\n", - " n_scans: int = 300,\n", - " hrf_model: str = 'glover',\n", - " smoothing_fwhm: float = 5.0,\n", - "):\n", - "\n", - " # randomly choose subjects\n", - " fixed_effect = workflow.add(\n", - " FixedEffectWorkflow(\n", - " run_id=run_id,\n", - " tr=tr,\n", - " n_scans=n_scans,\n", - " hrf_model=hrf_model,\n", - " smoothing_fwhm=smoothing_fwhm,\n", - " )\n", - " .split(subj_id=random.sample(range(1,17), n_subj))\n", - " .combine('subj_id')\n", - " )\n", - "\n", - " secondlevel = workflow.add(\n", - " SecondLevelWorkflow(\n", - " n_subj = n_subj,\n", - " firstlevel_stats_list=fixed_effect.fx_t_test_list,\n", - " firstlevel_contrast=fixed_effect.first_level_contrast,\n", - " )\n", - " )\n", - " \n", - " \n", - " # add task - cluster_thresholding\n", - " cluster_thresholding = workflow.add(\n", - " ClusterThresholding(\n", - " stat_maps_dict=secondlevel.second_level_stats_map, \n", - " threshold=3.29, \n", - " cluster_threshold=10\n", - " )\n", - " )\n", - "\n", - "\n", - " # add task - multiple_comparison\n", - " multiple_comparison = workflow.add(\n", - " MultipleComparison(\n", - " stat_maps_dict=secondlevel.second_level_stats_map, \n", - " alpha=0.05,\n", - " height_control='fdr'\n", - " )\n", - " )\n", - "\n", - " # add task - parametric_test\n", - " parametric_test = workflow.add(\n", - " ParametricTest(\n", - " stat_maps_dict=secondlevel.second_level_stats_map, \n", - " secondlevel_mask=secondlevel.second_level_mask\n", - " )\n", - " \n", - " )\n", - "\n", - " # add task - nonparametric_test\n", - " nonparametric_test = workflow.add(\n", - " NonparametricTest(\n", - " firstlevel_stats_list=fixed_effect.fx_t_test_list,\n", - " smoothing_fwhm=5.0,\n", - " design_matrix=secondlevel.second_level_designmatrix,\n", - " firstlevel_contrast=fixed_effect.first_level_contrast,\n", - " n_perm=100,\n", - " )\n", - " )\n", - "\n", - " return (\n", - " secondlevel.second_level_stats_map,\n", - " cluster_thresholding.thresholded_map_dict,\n", - " cluster_thresholding.plot_contrast_dict,\n", - " multiple_comparison.thresholded_map_dict,\n", - " multiple_comparison.plot_contrast_dict,\n", - " parametric_test.thresholded_map_dict,\n", - " parametric_test.plot_contrast_dict,\n", - " nonparametric_test.thresholded_map_dict,\n", - " nonparametric_test.plot_contrast_dict\n", - " )\n" - ] - }, - { - "cell_type": "markdown", - "id": "27ce8b99", - "metadata": {}, - "source": [ - "### Run Workflow Run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "487fc005", - "metadata": { - "tags": [ - "hide-output" - ] - }, - "outputs": [], - "source": [ - "from pydra import Submitter\n", - "\n", - "wf = TwoLevelGLM(n_subj=2)\n", - "\n", - "with Submitter(plugin='cf', n_procs=1) as submitter:\n", - " submitter(wf)\n", - "\n", - "results = wf.result()\n", - "\n", - "print(results)" - ] - }, - { - "cell_type": "markdown", - "id": "57e9a1a1", - "metadata": {}, - "source": [ - "## Let's Plot!\n", - "\n", - "We only use 5 subjects, so it's reasonable the following plots have nothing survived from testing." - ] - }, - { - "cell_type": "markdown", - "id": "a9a671d3", - "metadata": {}, - "source": [ - "### Unthresholded\n", - "\n", - "Let's plot the unthresholded image first." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "01f314c3", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "from IPython.display import Image\n", - "ut_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_unthresholded*.jpg\"))\n", - "Image(filename=ut_list[0])" - ] - }, - { - "cell_type": "markdown", - "id": "e87582eb", - "metadata": {}, - "source": [ - "### Cluster Thresholding" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92797899", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "from IPython.display import Image\n", - "ct_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_cluster_thresholded*.jpg\"))\n", - "Image(filename=ct_list[0])" - ] - }, - { - "cell_type": "markdown", - "id": "a6b6f0ac", - "metadata": {}, - "source": [ - "### Multiple Comparison" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "270bf2b9", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "mc_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_multiple_comp*.jpg\"))\n", - "Image(filename=mc_list[0])" - ] - }, - { - "cell_type": "markdown", - "id": "1a106690", - "metadata": {}, - "source": [ - "### Parametric Test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34d1d04f", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "pt_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_Parametric*.jpg\"))\n", - "Image(filename=pt_list[0])" - ] - }, - { - "cell_type": "markdown", - "id": "b4710f5d", - "metadata": {}, - "source": [ - "### NonParametric Test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1f354cc", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "npt_list = glob.glob(os.path.join(workflow_out_dir, \"secondlevel_permutation*.jpg\"))\n", - "Image(filename=npt_list[0])" - ] - }, - { - "cell_type": "markdown", - "id": "47100abc", - "metadata": {}, - "source": [ - "## Exercise #1" - ] - }, - { - "cell_type": "markdown", - "id": "900179cf", - "metadata": {}, - "source": [ - "In this example, we conducted GLM on each run per subject separately and then used a fixed-effect model to average across runs. \n", - "\n", - "Where did we put `.splitter` and `.combiner`. Why did we put it there?" - ] - }, - { - "cell_type": "markdown", - "id": "20c26eac", - "metadata": {}, - "source": [ - "## Exercise #2" - ] - }, - { - "cell_type": "markdown", - "id": "a3193098", - "metadata": {}, - "source": [ - "Moreover, We choose this approach due to limited memory on GitHub. [FirstLevelModel](https://nilearn.github.io/stable/modules/generated/nilearn.glm.first_level.FirstLevelModel.html) in Nilearn also allows to compute multiple runs with a fixed-effect model simultaneously. Here is an [example](https://nilearn.github.io/stable/auto_examples/04_glm_first_level/plot_fiac_analysis.html#sphx-glr-auto-examples-04-glm-first-level-plot-fiac-analysis-py). \n", - "\n", - "Would you like to give it a try on your own?" - ] - } - ], - "metadata": { - "jupytext": { - "formats": "ipynb,md:myst" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/new-docs/source/index.rst b/new-docs/source/index.rst index a1b3417ea9..65ab02e132 100644 --- a/new-docs/source/index.rst +++ b/new-docs/source/index.rst @@ -93,8 +93,7 @@ The following comprehensive examples demonstrate how to use Pydra to build and e complex workflows * :ref:`T1w MRI preprocessing` -* :ref:`One-level GLM` -* :ref:`Two-Level GLM` +* :ref:`General Linear Model (GLM)` How-to Guides ------------- @@ -140,8 +139,7 @@ See the full reference documentation for Pydra :hidden: examples/t1w-preprocess - examples/first-level-glm - examples/two-level-glm + examples/glm .. toctree:: :maxdepth: 2 diff --git a/pyproject.toml b/pyproject.toml index 81be96dc6e..4ba263cbe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ doc = [ "nilearn", "numpy", "numpydoc>=0.6.0", + "openneuro", "packaging", "pandas", "pandoc", @@ -97,6 +98,7 @@ tutorial = [ "nibabel", "nilearn", "numpy", + "openneuro", "pandas", "psutil", "pydra-mrtrix3 >=3.0.4a17", From dcecbf3e92a6e04a6ac1a7b43da0873f2ab4e5bd Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 15:02:42 +1100 Subject: [PATCH 234/342] removed .python-version file --- .gitignore | 1 + new-docs/source/examples/.python-version | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 new-docs/source/examples/.python-version diff --git a/.gitignore b/.gitignore index 293fc4f8a8..4fba2fa8de 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ dist __pycache__ *.pyc +.python-version .ipynb_checkpoints .vscode/ diff --git a/new-docs/source/examples/.python-version b/new-docs/source/examples/.python-version deleted file mode 100644 index 2da62ef804..0000000000 --- a/new-docs/source/examples/.python-version +++ /dev/null @@ -1 +0,0 @@ -wf12 From c422033fdfce102a767a9c8e78145a56a6341475 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 15:24:40 +1100 Subject: [PATCH 235/342] allow graceful failure of workflows when not debugging --- new-docs/source/examples/glm.ipynb | 97 ++++++------------------------ pydra/engine/submitter.py | 45 +++++++++++--- pyproject.toml | 1 + 3 files changed, 55 insertions(+), 88 deletions(-) diff --git a/new-docs/source/examples/glm.ipynb b/new-docs/source/examples/glm.ipynb index e82b384686..3e5e7c26b7 100644 --- a/new-docs/source/examples/glm.ipynb +++ b/new-docs/source/examples/glm.ipynb @@ -56,6 +56,7 @@ "from pathlib import Path\n", "\n", "from pydra.design import python, workflow\n", + "from pydra.engine.submitter import Submitter\n", "from fileformats.generic import File, Directory\n", "from fileformats.text import Csv\n", "import pandas as pd\n", @@ -588,8 +589,10 @@ "source": [ "wf = FullWorkflow(output_dir=workflow_out_dir, n_subjects=1, contrast='StopSuccess - Go')\n", "\n", - "if False:\n", - " results = wf(plugin='cf', n_procs=4)\n", + "if __name__ == \"__main__\":\n", + " with Submitter(worker='cf', n_procs=4) as sub:\n", + " results = sub(wf)\n", + "\n", " print(results)" ] }, @@ -630,7 +633,7 @@ }, "outputs": [], "source": [ - "!ls ../outputs/6_glm" + "! ls ../outputs/6_glm" ] }, { @@ -641,14 +644,6 @@ "### Plot figures" ] }, - { - "cell_type": "markdown", - "id": "dad22ca7", - "metadata": {}, - "source": [ - "#### First level contrast" - ] - }, { "cell_type": "code", "execution_count": null, @@ -662,73 +657,19 @@ "source": [ "from IPython.display import Image\n", "\n", - "Image(filename='../outputs/6_glm/firstlevel_contrast.jpg')" - ] - }, - { - "cell_type": "markdown", - "id": "0cdfcc29", - "metadata": {}, - "source": [ - "#### Nilearn Z map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f08aa59f", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "Image(filename='../outputs/6_glm/nilearn_z_map.jpg')" - ] - }, - { - "cell_type": "markdown", - "id": "ca1b896f", - "metadata": {}, - "source": [ - "#### FSL Z map" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7d18b6ed", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "Image(filename='../outputs/6_glm/fsl_z_map.jpg')" - ] - }, - { - "cell_type": "markdown", - "id": "fc68e7dc", - "metadata": {}, - "source": [ - "#### Nilearn FSL comparison" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a37679ff", - "metadata": { - "tags": [ - "hide-input" - ] - }, - "outputs": [], - "source": [ - "Image(filename='../outputs/6_glm/nilearn_fsl_comp.jpg')" + "\n", + "if not results.errored:\n", + " # First-level contrast\n", + " Image(filename='../outputs/6_glm/firstlevel_contrast.jpg')\n", + "\n", + " # Nilearn Z map\n", + " Image(filename='../outputs/6_glm/nilearn_z_map.jpg')\n", + "\n", + " # FSL Z map\n", + " Image(filename='../outputs/6_glm/fsl_z_map.jpg')\n", + "\n", + " # Nilearn and FSL comparison\n", + " Image(filename='../outputs/6_glm/nilearn_fsl_comp.jpg')" ] }, { diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 07ba42b063..b3a4689136 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -29,11 +29,12 @@ if ty.TYPE_CHECKING: from .node import Node - from .specs import TaskDef, WorkflowDef, TaskHooks + from .specs import TaskDef, TaskOutputs, WorkflowDef, TaskHooks, Result from .environments import Environment from .state import State DefType = ty.TypeVar("DefType", bound="TaskDef") +OutputType = ty.TypeVar("OutputType", bound="TaskOutputs") # Used to flag development mode of Audit develop = False @@ -167,14 +168,34 @@ def worker(self): def __call__( self, - task_def: "TaskDef", - name: str | None = "task", + task_def: "TaskDef[OutputType]", hooks: "TaskHooks | None" = None, - ): - """Submitter run function.""" + raise_errors: bool | None = None, + ) -> "Result[OutputType]": + """Submitter run function. - if name is None: - name = "task" + Parameters + ---------- + task_def : :obj:`~pydra.engine.specs.TaskDef` + The task definition to run + hooks : :obj:`~pydra.engine.specs.TaskHooks`, optional + Task hooks, callable functions called as the task is setup and torn down, + by default no functions are called at the hooks + raise_errors : bool, optional + Whether to raise errors, by default True if the 'debug' worker is used, + otherwise False + + Returns + ------- + result : Any + The result of the task + """ + if raise_errors is None: + raise_errors = self.worker_name == "debug" + if not isinstance(raise_errors, bool): + raise TypeError( + f"'raise_errors' must be a boolean or None, not {type(raise_errors)}" + ) task_def._check_rules() # If the outer task is split, create an implicit workflow to hold the split nodes @@ -198,7 +219,7 @@ def Split(defn: TaskDef, output_types: dict): task = Task( task_def, submitter=self, - name=name, + name="task", environment=self.environment, hooks=hooks, ) @@ -211,11 +232,15 @@ def Split(defn: TaskDef, output_types: dict): else: self.worker.run(task, rerun=self.rerun) except Exception as e: - e.add_note( + msg = ( f"Full crash report for {type(task_def).__name__!r} task is here: " + str(task.output_dir / "_error.pklz") ) - raise e + if raise_errors: + e.add_note(msg) + raise e + else: + logger.error("\nTask execution failed\n" + msg) finally: self.run_start_time = None PersistentCache().clean_up() diff --git a/pyproject.toml b/pyproject.toml index 4ba263cbe6..34d92c3f4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ doc = [ "ipython", "ipykernel", "ipywidgets", + "matplotlib", "nbsphinx", "nest_asyncio", "nibabel", From 0b91b95c090e0931b2a29a6d44c69c5b049270cb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 15:28:36 +1100 Subject: [PATCH 236/342] fixed package name of openneuro-py --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 34d92c3f4f..a7b7de2e35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ doc = [ "nilearn", "numpy", "numpydoc>=0.6.0", - "openneuro", + "openneuro-py", "packaging", "pandas", "pandoc", @@ -99,7 +99,7 @@ tutorial = [ "nibabel", "nilearn", "numpy", - "openneuro", + "openneuro-py", "pandas", "psutil", "pydra-mrtrix3 >=3.0.4a17", From 6358b81c8e955f0040c2d820f85b65b07f0f65ce Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 15:36:36 +1100 Subject: [PATCH 237/342] removed name attribute from submitter call --- pydra/engine/specs.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 27969fd587..f5dcec528c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -186,7 +186,6 @@ def __call__( audit_flags: AuditFlag = AuditFlag.NONE, messengers: ty.Iterable[Messenger] | None = None, messenger_args: dict[str, ty.Any] | None = None, - name: str | None = None, hooks: TaskHooks | None = None, **kwargs: ty.Any, ) -> OutputsType: @@ -212,8 +211,6 @@ def __call__( Messengers, by default None messenger_args : dict, optional Messenger arguments, by default None - name : str - The name of the task, by default None **kwargs : dict Keyword arguments to pass on to the worker initialisation @@ -242,7 +239,6 @@ def __call__( ) as sub: result = sub( self, - name=name, hooks=hooks, ) except TypeError as e: From f7934eb5759b8fa65ba79eb8432f9bb7348c804f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 15:37:24 +1100 Subject: [PATCH 238/342] renamed default task name that is submitted to "main" (from "task") --- pydra/engine/submitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index b3a4689136..02c51f4216 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -219,7 +219,7 @@ def Split(defn: TaskDef, output_types: dict): task = Task( task_def, submitter=self, - name="task", + name="main", environment=self.environment, hooks=hooks, ) From 84a8d3a817b87943f2062978eae7ba7ecf925ea8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 16:23:43 +1100 Subject: [PATCH 239/342] updated name of GLM example notebook --- new-docs/source/examples/glm.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/new-docs/source/examples/glm.ipynb b/new-docs/source/examples/glm.ipynb index 3e5e7c26b7..73813010de 100644 --- a/new-docs/source/examples/glm.ipynb +++ b/new-docs/source/examples/glm.ipynb @@ -5,7 +5,7 @@ "id": "c8149a94", "metadata": {}, "source": [ - "# One-Level GLM" + "# General Linear Model (GLM)" ] }, { From 099b0f4c324b4abfdd4be314b25eaa4d021ef9a8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 16:53:40 +1100 Subject: [PATCH 240/342] fixing up errors in test_shell --- pydra/design/shell.py | 16 ++++++++++++++-- pydra/design/tests/test_shell.py | 26 ++++++++++++++++++-------- pydra/engine/specs.py | 4 ++-- pydra/utils/typing.py | 14 ++++++++------ 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 6f6cde3441..01ab0be45e 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -22,7 +22,12 @@ make_task_def, NO_DEFAULT, ) -from pydra.utils.typing import is_fileset_or_union, MultiInputObj +from pydra.utils.typing import ( + is_fileset_or_union, + MultiInputObj, + is_optional, + optional_type, +) if ty.TYPE_CHECKING: from pydra.engine.specs import ShellDef @@ -94,11 +99,15 @@ class arg(Arg): argstr: str | None = "" position: int | None = None - sep: str | None = attrs.field(default=None) + sep: str | None = attrs.field() allowed_values: list | None = None container_path: bool = False # IS THIS STILL USED?? formatter: ty.Callable | None = None + @sep.default + def _sep_default(self): + return " " if self.type is tuple or ty.get_origin(self.type) is tuple else None + @sep.validator def _validate_sep(self, _, sep): if self.type is ty.Any: @@ -107,7 +116,10 @@ def _validate_sep(self, _, sep): tp = ty.get_args(self.type)[0] else: tp = self.type + if is_optional(tp): + tp = optional_type(tp) origin = ty.get_origin(tp) or tp + if ( inspect.isclass(origin) and issubclass(origin, ty.Sequence) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 0fc5ff0170..f4fcd64238 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -162,7 +162,7 @@ def test_interface_template_more_complex(): Cp = shell.define( ( - "cp " + "cp " "-R " "--text-arg " "--int-arg " @@ -187,7 +187,9 @@ def test_interface_template_more_complex(): help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", + type=MultiInputObj[FsObject], + position=1, ), output, shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), @@ -210,6 +212,7 @@ def test_interface_template_more_complex(): argstr="--tuple-arg", type=tuple[int, str] | None, default=None, + sep=" ", position=6, ), ShellDef.additional_args, @@ -245,7 +248,7 @@ def test_interface_template_with_overrides_and_optionals(): Cp = shell.define( ( - "cp " + "cp " "-R " "--text-arg " "--int-arg " @@ -284,7 +287,9 @@ def test_interface_template_with_overrides_and_optionals(): help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", + type=MultiInputObj[FsObject], + position=1, ), shell.arg( name="recursive", @@ -306,6 +311,7 @@ def test_interface_template_with_overrides_and_optionals(): name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str], + sep=" ", position=5, ), ] + outargs + [ShellDef.additional_args] @@ -332,7 +338,7 @@ def test_interface_template_with_defaults(): Cp = shell.define( ( - "cp " + "cp " "-R " "--text-arg " "--int-arg " @@ -357,7 +363,9 @@ def test_interface_template_with_defaults(): help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", + type=MultiInputObj[FsObject], + position=1, ), output, shell.arg(name="recursive", argstr="-R", type=bool, default=True, position=3), @@ -400,7 +408,7 @@ def test_interface_template_with_type_overrides(): Cp = shell.define( ( - "cp " + "cp " "-R " "--text-arg " "--int-arg " @@ -426,7 +434,9 @@ def test_interface_template_with_type_overrides(): help=shell.EXECUTABLE_HELP_STRING, ), shell.arg( - name="in_fs_objects", type=MultiInputObj[FsObject], position=1, sep=" " + name="in_fs_objects", + type=MultiInputObj[FsObject], + position=1, ), output, shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index f5dcec528c..3b81cc54fa 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -18,7 +18,7 @@ import cloudpickle as cp from fileformats.generic import FileSet from pydra.utils.messenger import AuditFlag, Messenger -from pydra.utils.typing import TypeParser, is_optional, non_optional_type +from pydra.utils.typing import TypeParser, is_optional, optional_type from .helpers import ( attrs_fields, attrs_values, @@ -1130,7 +1130,7 @@ def _command_pos_args( cmd_add = [] # formatter that creates a custom command argument # it can take the value of the field, all inputs, or the value of other fields. - tp = non_optional_type(field.type) if is_optional(field.type) else field.type + tp = optional_type(field.type) if is_optional(field.type) else field.type if field.formatter: call_args = inspect.getfullargspec(field.formatter) call_args_val = {} diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index d2420785a6..cb4e46311c 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1037,22 +1037,24 @@ def label_str(self): def is_union(type_: type) -> bool: + """Checks whether a type is a Union, in either ty.Union[T, U] or T | U form""" return ty.get_origin(type_) in UNION_TYPES def is_optional(type_: type) -> bool: - """Check if the type is Optional""" + """Check if the type is Optional, i.e. a union containing None""" if is_union(type_): return any(a is type(None) or is_optional(a) for a in ty.get_args(type_)) return False -def non_optional_type(type_: type) -> type: +def optional_type(type_: type) -> type: + """Gets the non-None args of an optional type (i.e. a union with a None arg)""" if is_optional(type_): - non_optional = [a for a in ty.get_args(type_) if a is not type(None)] - if len(non_optional) == 1: - return non_optional[0] - return ty.Union[tuple(non_optional)] + non_none = [a for a in ty.get_args(type_) if a is not type(None)] + if len(non_none) == 1: + return non_none[0] + return ty.Union[tuple(non_none)] return type_ From 307b52b346cb962708d65dde3c5620a3cc8a39be Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 16:33:06 +1100 Subject: [PATCH 241/342] added comment --- pydra/engine/tests/test_environments.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index fd36afaf8a..98351796fb 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -34,6 +34,7 @@ def newcache(x): name="shelly", ) + # Up to here env_outputs = Native().execute(shelly_task) outputs = shelly(cache_dir=newcache("shelly-exec")) assert env_outputs == attrs_values(outputs) From f74ddae80e4e80653a117729f3fd5dcfca06afbb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 17 Feb 2025 20:59:36 +1100 Subject: [PATCH 242/342] starting to convert test_environments tests to new syntax --- pydra/engine/tests/test_environments.py | 57 +++++++++++++------------ 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 98351796fb..65dcbb4a00 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,5 +1,5 @@ from pathlib import Path - +import typing as ty from ..environments import Native, Docker, Singularity from ..task import ShellDef from ..submitter import Submitter @@ -17,6 +17,10 @@ def makedir(path, name): return newdir +def drop_stderr(dct: dict[str, ty.Any]): + return {k: v for k, v in dct.items() if k != "stderror"} + + def test_native_1(tmp_path): """simple command, no arguments""" @@ -24,27 +28,26 @@ def newcache(x): return makedir(tmp_path, x) cmd = "whoami" - ShellDef = shell.define(cmd) - - shelly = ShellDef() + Shelly = shell.define(cmd) + shelly = Shelly() assert shelly.cmdline == cmd - shelly_task = Task( + + shelly_job = Task( definition=shelly, submitter=Submitter(cache_dir=newcache("shelly-task")), name="shelly", ) + env_outputs = Native().execute(shelly_job) - # Up to here - env_outputs = Native().execute(shelly_task) outputs = shelly(cache_dir=newcache("shelly-exec")) - assert env_outputs == attrs_values(outputs) + assert drop_stderr(env_outputs) == drop_stderr(attrs_values(outputs)) outputs = shelly(environment=Native()) assert env_outputs == attrs_values(outputs) with Submitter(cache_dir=newcache("shelly-submitter"), environment=Native()) as sub: result = sub(shelly) - assert env_outputs == attrs_values(result.outputs) + assert drop_stderr(env_outputs) == drop_stderr(attrs_values(result.outputs)) @no_win @@ -57,14 +60,16 @@ def newcache(x): cmd = ["whoami"] docker = Docker(image="busybox") - shell_def = shell.define(cmd) - shelly = Task( - definition=shell_def, + Shelly = shell.define(cmd) + shelly = Shelly() + assert shelly.cmdline == " ".join(cmd) + + shelly_job = Task( + definition=shelly, submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - assert shell_def.cmdline == " ".join(cmd) - env_res = docker.execute(shelly) + env_res = docker.execute(shelly_job) shelly_env = ShellDef( name="shelly", @@ -100,24 +105,20 @@ def newcache(x): cmd = "whoami" docker = Docker(image="busybox") - shell_def = shell.define(cmd)() - shelly = Task( - definition=shell_def, + shelly = shell.define(cmd)() + shelly_job = Task( + definition=shelly, submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - assert shell_def.cmdline == cmd - env_res = docker.execute(shelly) + assert shelly.cmdline == cmd + env_res = docker.execute(shelly_job) - shelly_env = ShellDef( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=docker, - ) - with Submitter(worker="cf") as sub: - shelly_env(submitter=sub) - assert env_res == shelly_env.result().output.__dict__ + with Submitter( + worker="cf", cache_dir=newcache("shelly_env"), environment=docker + ) as sub: + result = sub(shelly) + assert env_res == attrs_values(result.outputs) shelly_call = ShellDef( name="shelly", executable=cmd, cache_dir=newcache("shelly_call") From 67eadff348b256eb7d3af830a0417ffe2b2e34ef Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 21 Feb 2025 16:19:52 +1100 Subject: [PATCH 243/342] debugging test_environments --- pydra/engine/specs.py | 8 + pydra/engine/tests/test_environments.py | 434 +++++++++++------------- 2 files changed, 211 insertions(+), 231 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 3b81cc54fa..cd82ab96d5 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -654,6 +654,14 @@ def errors(self): return cp.load(f) return None + @property + def task(self): + task_pkl = self.output_dir / "_task.pklz" + if not task_pkl.exists(): + return None + with open(task_pkl, "rb") as f: + return cp.load(f) + @attrs.define(kw_only=True) class RuntimeSpec: diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 65dcbb4a00..fefb9b1c83 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,11 +1,12 @@ from pathlib import Path import typing as ty +import attrs from ..environments import Native, Docker, Singularity -from ..task import ShellDef from ..submitter import Submitter from fileformats.generic import File from pydra.design import shell from pydra.engine.core import Task +from pydra.engine.task import ShellDef from pydra.engine.helpers import attrs_values from .utils import no_win, need_docker, need_singularity import pytest @@ -69,22 +70,14 @@ def newcache(x): submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - env_res = docker.execute(shelly_job) + outputs_dict = docker.execute(shelly_job) - shelly_env = ShellDef( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=docker, - ) - shelly_env() - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + with Submitter(cache_dir=newcache("shelly_sub"), environment=docker) as sub: + result = sub(shelly) + assert attrs_values(result.outputs) == outputs_dict - shelly_call = ShellDef( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") - ) - shelly_call(environment=docker) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) + assert outputs_dict == attrs_values(outputs) @no_win @@ -112,20 +105,16 @@ def newcache(x): name="shelly", ) assert shelly.cmdline == cmd - env_res = docker.execute(shelly_job) + outputs_dict = docker.execute(shelly_job) with Submitter( - worker="cf", cache_dir=newcache("shelly_env"), environment=docker + worker="cf", cache_dir=newcache("shelly_sub"), environment=docker ) as sub: result = sub(shelly) - assert env_res == attrs_values(result.outputs) + assert outputs_dict == attrs_values(result.outputs) - shelly_call = ShellDef( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") - ) - with Submitter(worker="cf") as sub: - shelly_call(submitter=sub, environment=docker) - assert env_res == shelly_call.result().output.__dict__ + outputs = shelly(cache_dir=newcache("shelly_call"), environment=docker) + assert outputs_dict == attrs_values(outputs) @no_win @@ -145,22 +134,14 @@ def newcache(x): name="shelly", ) assert shell_def.cmdline == " ".join(cmd) - env_res = sing.execute(shelly) + outputs_dict = sing.execute(shelly) - shelly_env = ShellDef( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=sing, - ) - shelly_env() - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + with Submitter(cache_dir=newcache("shelly_sub"), environment=sing) as sub: + results = sub(shelly) + assert outputs_dict == attrs_values(results.outputs) - shelly_call = ShellDef( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") - ) - shelly_call(environment=sing) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) + assert outputs_dict == attrs_values(outputs) @no_win @@ -180,78 +161,74 @@ def newcache(x): name="shelly", ) assert shell_def.cmdline == " ".join(cmd) - env_res = sing.execute(shelly) + outputs_dict = sing.execute(shelly) - shelly_env = ShellDef( - name="shelly", - executable=cmd, - cache_dir=newcache("shelly_env"), - environment=sing, - ) with Submitter(worker=plugin) as sub: - shelly_env(submitter=sub) - assert env_res == shelly_env.result().output.__dict__ + results = sub(shelly) + assert outputs_dict == attrs_values(results.outputs) - shelly_call = ShellDef( - name="shelly", executable=cmd, cache_dir=newcache("shelly_call") - ) - with Submitter(worker=plugin) as sub: - shelly_call(submitter=sub, environment=sing) + outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) for key in [ "stdout", "return_code", ]: # singularity gives info about cashed image in stderr - assert env_res[key] == shelly_call.result().output.__dict__[key] + assert outputs_dict[key] == attrs_values(outputs)[key] -def create_shelly_inputfile(tempdir, filename, name, executable): +def shelly_with_input_factory(filename, executable) -> ShellDef: """creating a task with a simple input_spec""" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - help="files", - argstr="", - ) - ] - - kwargs = {} if filename is None else {"file": filename} - shelly = shell.define( + Shelly = shell.define( executable, - input=inputs, - )(**kwargs) - return shelly + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + help="files", + argstr="", + ) + ], + ) + return Shelly(**({} if filename is None else {"file": filename})) + + +def make_job(task: ShellDef, tempdir: Path, name: str): + return Task( + definition=task, + submitter=Submitter(cache_dir=makedir(tempdir, name)), + name=name, + ) def test_shell_fileinp(tmp_path): """task with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + input_dir = makedir(tmp_path, "inputs") filename = input_dir / "file.txt" with open(filename, "w") as f: f.write("hello ") - shelly = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] - ) - env_res = Native().execute(shelly) + shelly = shelly_with_input_factory(filename=filename, executable="cat") + shelly_job = make_job(shelly, tmp_path, "shelly") + outputs_dict = Native().execute(shelly_job) - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = Native() - shelly_env() - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + with Submitter(environment=Native(), cache_dir=newcache("shelly_sub")) as sub: + results = sub(shelly) + assert outputs_dict == attrs_values(results.outputs) - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] - ) - shelly_call(environment=Native()) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs = shelly(environment=Native(), cache_dir=newcache("shelly_call")) + assert outputs_dict == attrs_values(outputs) def test_shell_fileinp_st(tmp_path): """task (with a splitter) with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + input_dir = makedir(tmp_path, "inputs") filename_1 = input_dir / "file_1.txt" with open(filename_1, "w") as f: @@ -263,28 +240,25 @@ def test_shell_fileinp_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = Native() - shelly_env.split(file=filename) - shelly_env() - assert shelly_env.result()[0].output.stdout.strip() == "hello" - assert shelly_env.result()[1].output.stdout.strip() == "hi" - - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_call", executable=["cat"] + shelly = shelly_with_input_factory(filename=None, executable="cat") + with Submitter(environment=Native(), cache_dir=newcache("shelly")) as sub: + results = sub(shelly.split(file=filename)) + assert [s.strip() for s in results.outputs.stdout] == ["hello", "hi"] + + outputs = shelly.split(file=filename)( + environment=Native(), cache_dir=newcache("shelly_call") ) - shelly_call.split(file=filename) - shelly_call(environment=Native()) - assert shelly_call.result()[0].output.stdout.strip() == "hello" - assert shelly_call.result()[1].output.stdout.strip() == "hi" + assert [s.strip() for s in outputs.stdout] == ["hello", "hi"] @no_win @need_docker def test_docker_fileinp(tmp_path): """docker env: task with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + docker = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -292,30 +266,26 @@ def test_docker_fileinp(tmp_path): with open(filename, "w") as f: f.write("hello ") - shelly = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] - ) - env_res = docker.execute(shelly) + shelly = shelly_with_input_factory(filename=filename, executable="cat") + outputs_dict = docker.execute(shelly) - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = docker - shelly_env() + with Submitter(environment=docker, cache_dir=newcache("shell_sub")) as sub: + results = sub(shelly) - assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + assert outputs_dict == attrs_values(results.outputs) - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] - ) - shelly_call(environment=docker) - assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) + assert outputs_dict == attrs_values(outputs) @no_win @need_docker def test_docker_fileinp_subm(tmp_path, plugin): """docker env with a submitter: task with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + docker = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -323,31 +293,30 @@ def test_docker_fileinp_subm(tmp_path, plugin): with open(filename, "w") as f: f.write("hello ") - shelly = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] - ) - env_res = docker.execute(shelly) + shelly = shelly_with_input_factory(filename=filename, executable="cat") + shelly_job = make_job(shelly, tmp_path, "shelly_job") + outputs_dict = docker.execute(shelly_job) - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = docker + with Submitter( + environment=docker, cache_dir=newcache("shelly_sub"), worker=plugin + ) as sub: + results = sub(shelly) with Submitter(worker=plugin) as sub: - shelly_env(submitter=sub) - assert env_res == shelly_env.result().output.__dict__ + results = sub(shelly) + assert outputs_dict == attrs_values(results.outputs) - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] - ) - with Submitter(worker=plugin) as sub: - shelly_call(submitter=sub, environment=docker) - assert env_res == shelly_call.result().output.__dict__ + outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) + assert outputs_dict == attrs_values(outputs) @no_win @need_docker def test_docker_fileinp_st(tmp_path): """docker env: task (with a splitter) with a file in the command/input""" + + def newcache(x): + return makedir(tmp_path, x) + docker = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -361,54 +330,50 @@ def test_docker_fileinp_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_env", executable=["cat"] - ) - shelly_env.environment = docker - shelly_env.split(file=filename) - shelly_env() - assert shelly_env.result()[0].output.stdout.strip() == "hello" - assert shelly_env.result()[1].output.stdout.strip() == "hi" - - shelly_call = create_shelly_inputfile( - tempdir=tmp_path, filename=None, name="shelly_call", executable=["cat"] - ) - shelly_call.split(file=filename) - shelly_call(environment=docker) - assert shelly_call.result()[0].output.stdout.strip() == "hello" - assert shelly_call.result()[1].output.stdout.strip() == "hi" + shelly = shelly_with_input_factory(filename=None, executable="cat") + with Submitter(environment=docker, cache_dir=newcache("shelly_sub")) as sub: + results = sub(shelly.split(file=filename)) -def create_shelly_outputfile(tempdir, filename, name, executable="cp"): - """creating a task with an input_spec that contains a template""" - my_input_spec = [ - shell.arg( - name="file_orig", - type=File, - position=2, - help="new file", - argstr="", - ), - shell.arg( - name="file_copy", - type=str, - output_file_template="{file_orig}_copy", - help="output file", - argstr="", - ), - ] + assert [s.strip() for s in results.outputs.stdout] == ["hello", "hi"] - kwargs = {} if filename is None else {"file_orig": filename} - shelly = shell.define(executable)( - cache_dir=makedir(tempdir, name), - input_spec=my_input_spec, - **kwargs, + outputs = shelly.split(file=filename)( + environment=docker, cache_dir=newcache("shelly_call") ) - return shelly + assert [s.strip for s in outputs.stdout] == ["hello", "hi"] + + +def shelly_outputfile_factory(filename, executable="cp"): + """creating a task with an input_spec that contains a template""" + Shelly = shell.define( + executable, + inputs=[ + shell.arg( + name="file_orig", + type=File, + position=2, + help="new file", + argstr="", + ), + shell.arg( + name="file_copy", + type=str, + output_file_template="{file_orig}_copy", + help="output file", + argstr="", + ), + ], + ) + + return Shelly(**({} if filename is None else {"file_orig": filename})) def test_shell_fileout(tmp_path): """task with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + input_dir = makedir(tmp_path, "inputs") filename = input_dir / "file.txt" with open(filename, "w") as f: @@ -417,30 +382,27 @@ def test_shell_fileout(tmp_path): # execute does not create the cashedir, so this part will fail, # but I guess we don't want to use it this way anyway # shelly = create_shelly_outputfile(tempdir=tmp_path, filename=filename, name="shelly") - # env_res = Native().execute(shelly) + # outputs_dict = Native().execute(shelly) - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=filename, name="shelly_env" - ) - shelly_env.environment = Native() - shelly_env() - assert ( - Path(shelly_env.result().output.file_copy) - == shelly_env.output_dir / "file_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=filename) - shelly_call = create_shelly_outputfile( - tempdir=tmp_path, filename=filename, name="shelly_call" - ) - shelly_call(environment=Native()) + with Submitter(environment=Native(), cache_dir=newcache("shelly_sub")) as sub: + result = sub(shelly) + assert Path(result.outputs.file_copy) == result.output_dir / "file_copy.txt" + + outputs = shelly(environment=Native(), cache_dir=newcache("shelly_call")) assert ( - Path(shelly_call.result().output.file_copy) - == shelly_call.output_dir / "file_copy.txt" + Path(outputs.file_copy) + == newcache("shelly_call") / shelly._checksum / "file_copy.txt" ) def test_shell_fileout_st(tmp_path): """task (with a splitter) with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + input_dir = makedir(tmp_path, "inputs") filename_1 = input_dir / "file_1.txt" with open(filename_1, "w") as f: @@ -452,40 +414,44 @@ def test_shell_fileout_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=None, name="shelly_env" - ) - shelly_env.environment = Native() - shelly_env.split(file_orig=filename) - shelly_env() - assert ( - Path(shelly_env.result()[0].output.file_copy) - == shelly_env.output_dir[0] / "file_1_copy.txt" - ) - assert ( - Path(shelly_env.result()[1].output.file_copy) - == shelly_env.output_dir[1] / "file_2_copy.txt" + shelly = shelly_outputfile_factory( + tempdir=tmp_path, filename=None, name="shelly_sub" ) + with Submitter(environment=Native(), cache_dir=newcache("shelly")) as sub: + results = sub(shelly.split(file_orig=filename)) - shelly_call = create_shelly_outputfile( - tempdir=tmp_path, filename=None, name="shelly_call" - ) - shelly_call.split(file_orig=filename) - shelly_call(environment=Native()) - assert ( - Path(shelly_call.result()[0].output.file_copy) - == shelly_call.output_dir[0] / "file_1_copy.txt" - ) - assert ( - Path(shelly_call.result()[1].output.file_copy) - == shelly_call.output_dir[1] / "file_2_copy.txt" + assert results.outputs.file_copy == [ + File(results.output_dir / "file_1_copy.txt"), + File(results.output_dir / "file_2_copy.txt"), + ] + + call_cache = newcache("shelly_call") + + outputs = shelly.split(file_orig=filename)( + environment=Native(), cache_dir=call_cache ) + assert outputs.file_copy == [ + File( + call_cache + / attrs.evolve(shelly, file_orig=filename_1)._checksum + / "file_1_copy.txt" + ), + File( + call_cache + / attrs.evolve(shelly, file_orig=filename_1)._checksum + / "file_1_copy.txt" + ), + ] @no_win @need_docker def test_docker_fileout(tmp_path): """docker env: task with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + docker_env = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -493,14 +459,14 @@ def test_docker_fileout(tmp_path): with open(filename, "w") as f: f.write("hello ") - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=filename, name="shelly_env" + shelly_sub = shelly_outputfile_factory( + tempdir=tmp_path, filename=filename, name="shelly_sub" ) - shelly_env.environment = docker_env - shelly_env() + shelly_sub.environment = docker_env + shelly_sub() assert ( - Path(shelly_env.result().output.file_copy) - == shelly_env.output_dir / "file_copy.txt" + Path(shelly_sub.result().output.file_copy) + == shelly_sub.output_dir / "file_copy.txt" ) @@ -508,6 +474,10 @@ def test_docker_fileout(tmp_path): @need_docker def test_docker_fileout_st(tmp_path): """docker env: task (with a splitter) with a file in the output""" + + def newcache(x): + return Path(makedir(tmp_path, x)) + docker_env = Docker(image="busybox") input_dir = makedir(tmp_path, "inputs") @@ -521,17 +491,19 @@ def test_docker_fileout_st(tmp_path): filename = [filename_1, filename_2] - shelly_env = create_shelly_outputfile( - tempdir=tmp_path, filename=None, name="shelly_env" - ) - shelly_env.environment = docker_env - shelly_env.split(file_orig=filename) - shelly_env() - assert ( - Path(shelly_env.result()[0].output.file_copy) - == shelly_env.output_dir[0] / "file_1_copy.txt" - ) - assert ( - Path(shelly_env.result()[1].output.file_copy) - == shelly_env.output_dir[1] / "file_2_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=None) + + with Submitter(environment=docker_env, cache_dir=newcache("shelly_sub")) as sub: + results = sub(shelly.split(file_orig=filename)) + assert results.outputs.file_copy == [ + File( + results.output_dir + / attrs.evolve(shelly, file_orig=filename_1)._checksum + / "file_1_copy.txt" + ), + File( + results.output_dir + / attrs.evolve(shelly, file_orig=filename_2)._checksum + / "file_2_copy.txt" + ), + ] From 8c7fc19152e2e96d0b84a239135e44f59b73372f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 11:11:39 +1100 Subject: [PATCH 244/342] debugged test_environments (except singularity tests) --- pydra/design/shell.py | 7 ++- pydra/engine/environments.py | 28 ++++----- pydra/engine/helpers_file.py | 16 +++-- pydra/engine/specs.py | 5 +- pydra/engine/tests/test_environments.py | 82 +++++++++---------------- pydra/utils/typing.py | 25 +++++++- 6 files changed, 86 insertions(+), 77 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 01ab0be45e..591f297dfb 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -228,10 +228,15 @@ def _validate_path_template(self, attribute, value): f"path_template ({value!r}) can only be provided when no default " f"({self.default!r}) is provided" ) + if value and not is_fileset_or_union(self.type): + raise ValueError( + f"path_template ({value!r}) can only be provided when type is a FileSet, " + f"or union thereof, not {self.type!r}" + ) @keep_extension.validator def _validate_keep_extension(self, attribute, value): - if value and self.path_template is not None: + if value and self.path_template is None: raise ValueError( f"keep_extension ({value!r}) can only be provided when path_template " f"is provided" diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 0cec18895a..e28bef0596 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -10,7 +10,6 @@ if ty.TYPE_CHECKING: from pydra.engine.core import Task from pydra.engine.specs import ShellDef - from pydra.design import shell class Environment: @@ -94,7 +93,7 @@ def bind(self, loc, mode="ro"): return f"{loc_abs}:{self.root}{loc_abs}:{mode}" def _get_bindings( - self, definition: "ShellDef", root: str | None = None + self, task: "Task", root: str | None = None ) -> tuple[dict[str, tuple[str, str]], dict[str, tuple[Path, ...]]]: """Return bindings necessary to run task in an alternative root. @@ -111,27 +110,32 @@ def _get_bindings( bindings: dict Mapping from paths in the host environment to the target environment """ + from pydra.design import shell + bindings: dict[str, tuple[str, str]] = {} input_updates: dict[str, tuple[Path, ...]] = {} if root is None: return bindings fld: shell.arg - for fld in list_fields(definition): + for fld in list_fields(task.definition): if TypeParser.contains_type(FileSet, fld.type): - fileset: FileSet | None = definition[fld.name] - if fileset is None: + fileset: FileSet | None = task.inputs[fld.name] + if not fileset: continue if not isinstance(fileset, (os.PathLike, FileSet)): raise NotImplementedError( - "Generating environment bindings for nested FileSets is not " - "supported yet" + f"No support for generating bindings for {type(fileset)} types " + f"({fileset})" ) copy = fld.copy_mode == FileSet.CopyMode.copy host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") # Default to mounting paths as read-only, but respect existing modes - bindings[host_path] = (env_path, "rw" if copy else "ro") + bindings[host_path] = ( + env_path, + "rw" if copy or isinstance(fld, shell.outarg) else "ro", + ) # Provide updated in-container paths to the command to be run. If a # fs-object, which resolves to a single path, just pass in the name of @@ -152,9 +156,7 @@ class Docker(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = self._get_bindings( - definition=task.definition, root=self.root - ) + mounts, input_updates = self._get_bindings(task=task, root=self.root) docker_args = [ "docker", @@ -193,9 +195,7 @@ class Singularity(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = self._get_bindings( - definition=task.definition, root=self.root - ) + mounts, input_updates = self._get_bindings(task=task, root=self.root) # todo adding xargsy etc singularity_args = [ diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index f942674747..5e55518e53 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -157,7 +157,7 @@ def template_update_single( input_values: dict[str, ty.Any] = None, output_dir: Path | None = None, spec_type: str = "input", -): +) -> Path | None: """Update a single template from the input_spec or output_spec based on the value from inputs_dict (checking the types of the fields, that have "output_file_template)" @@ -198,9 +198,9 @@ def template_update_single( if output_dir and value is not None: # should be converted to str, it is also used for input fields that should be str if type(value) is list: - return [str(output_dir / Path(val).name) for val in value] + return [output_dir / val.name for val in value] else: - return str(output_dir / Path(value).name) + return output_dir / value.name else: return None @@ -243,7 +243,7 @@ def _template_formatting(field, definition, input_values): formatted = _string_template_formatting( field, template, definition, input_values ) - return formatted + return Path(formatted) def _string_template_formatting(field, template, definition, input_values): @@ -252,6 +252,14 @@ def _string_template_formatting(field, template, definition, input_values): inp_fields = re.findall(r"{\w+}", template) inp_fields_fl = re.findall(r"{\w+:[0-9.]+f}", template) inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_fl] + + # FIXME: This would be a better solution, and would allow you to explicitly specify + # whether you want to use the extension of the input file or not, by referencing + # the "ext" attribute of the input file. However, this would require a change in the + # way the element formatting is done + # + # inp_fields = set(re.findall(r"{(\w+)(?:\.\w+)?(?::[0-9.]+f)?}", template)) + if len(inp_fields) == 0: return template diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index cd82ab96d5..6b154dc23e 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -521,7 +521,7 @@ def _check_rules(self): if is_lazy(value): continue - if value is attrs.NOTHING: + if value is attrs.NOTHING and not getattr(field, "path_template", False): errors.append(f"Mandatory field {field.name!r} is not set") # Collect alternative fields associated with this field. @@ -1043,10 +1043,9 @@ def _command_args( output_dir = Path.cwd() self._check_resolved() inputs = attrs_values(self) - modified_inputs = template_update(self, output_dir=output_dir) + inputs.update(template_update(self, output_dir=output_dir)) if input_updates: inputs.update(input_updates) - inputs.update(modified_inputs) pos_args = [] # list for (position, command arg) positions_provided = [] for field in list_fields(self): diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index fefb9b1c83..305c0cc844 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -43,7 +43,7 @@ def newcache(x): outputs = shelly(cache_dir=newcache("shelly-exec")) assert drop_stderr(env_outputs) == drop_stderr(attrs_values(outputs)) - outputs = shelly(environment=Native()) + outputs = shelly(environment=Native(), cache_dir=newcache("shelly-call")) assert env_outputs == attrs_values(outputs) with Submitter(cache_dir=newcache("shelly-submitter"), environment=Native()) as sub: @@ -59,11 +59,11 @@ def test_docker_1(tmp_path): def newcache(x): makedir(tmp_path, x) - cmd = ["whoami"] + cmd = "whoami" docker = Docker(image="busybox") Shelly = shell.define(cmd) shelly = Shelly() - assert shelly.cmdline == " ".join(cmd) + assert shelly.cmdline == cmd shelly_job = Task( definition=shelly, @@ -267,7 +267,7 @@ def newcache(x): f.write("hello ") shelly = shelly_with_input_factory(filename=filename, executable="cat") - outputs_dict = docker.execute(shelly) + outputs_dict = docker.execute(make_job(shelly, tmp_path, "shelly")) with Submitter(environment=docker, cache_dir=newcache("shell_sub")) as sub: results = sub(shelly) @@ -340,7 +340,7 @@ def newcache(x): outputs = shelly.split(file=filename)( environment=docker, cache_dir=newcache("shelly_call") ) - assert [s.strip for s in outputs.stdout] == ["hello", "hi"] + assert [s.strip() for s in outputs.stdout] == ["hello", "hi"] def shelly_outputfile_factory(filename, executable="cp"): @@ -351,16 +351,20 @@ def shelly_outputfile_factory(filename, executable="cp"): shell.arg( name="file_orig", type=File, - position=2, + position=1, help="new file", argstr="", ), - shell.arg( + ], + outputs=[ + shell.outarg( name="file_copy", - type=str, - output_file_template="{file_orig}_copy", + type=File, + path_template="{file_orig}_copy", help="output file", argstr="", + position=2, + keep_extension=True, ), ], ) @@ -390,11 +394,10 @@ def newcache(x): result = sub(shelly) assert Path(result.outputs.file_copy) == result.output_dir / "file_copy.txt" - outputs = shelly(environment=Native(), cache_dir=newcache("shelly_call")) - assert ( - Path(outputs.file_copy) - == newcache("shelly_call") / shelly._checksum / "file_copy.txt" - ) + call_cache = newcache("shelly_call") + + outputs = shelly(environment=Native(), cache_dir=call_cache) + assert Path(outputs.file_copy) == call_cache / shelly._checksum / "file_copy.txt" def test_shell_fileout_st(tmp_path): @@ -414,15 +417,13 @@ def newcache(x): filename = [filename_1, filename_2] - shelly = shelly_outputfile_factory( - tempdir=tmp_path, filename=None, name="shelly_sub" - ) + shelly = shelly_outputfile_factory(filename=None) with Submitter(environment=Native(), cache_dir=newcache("shelly")) as sub: results = sub(shelly.split(file_orig=filename)) - assert results.outputs.file_copy == [ - File(results.output_dir / "file_1_copy.txt"), - File(results.output_dir / "file_2_copy.txt"), + assert [f.name for f in results.outputs.file_copy] == [ + "file_1_copy.txt", + "file_2_copy.txt", ] call_cache = newcache("shelly_call") @@ -430,18 +431,7 @@ def newcache(x): outputs = shelly.split(file_orig=filename)( environment=Native(), cache_dir=call_cache ) - assert outputs.file_copy == [ - File( - call_cache - / attrs.evolve(shelly, file_orig=filename_1)._checksum - / "file_1_copy.txt" - ), - File( - call_cache - / attrs.evolve(shelly, file_orig=filename_1)._checksum - / "file_1_copy.txt" - ), - ] + assert [f.name for f in outputs.file_copy] == ["file_1_copy.txt", "file_2_copy.txt"] @no_win @@ -459,15 +449,11 @@ def newcache(x): with open(filename, "w") as f: f.write("hello ") - shelly_sub = shelly_outputfile_factory( - tempdir=tmp_path, filename=filename, name="shelly_sub" - ) - shelly_sub.environment = docker_env - shelly_sub() - assert ( - Path(shelly_sub.result().output.file_copy) - == shelly_sub.output_dir / "file_copy.txt" - ) + shelly = shelly_outputfile_factory(filename=filename) + + with Submitter(environment=docker_env, cache_dir=newcache("shelly")) as sub: + results = sub(shelly) + assert results.outputs.file_copy == File(results.output_dir / "file_copy.txt") @no_win @@ -495,15 +481,7 @@ def newcache(x): with Submitter(environment=docker_env, cache_dir=newcache("shelly_sub")) as sub: results = sub(shelly.split(file_orig=filename)) - assert results.outputs.file_copy == [ - File( - results.output_dir - / attrs.evolve(shelly, file_orig=filename_1)._checksum - / "file_1_copy.txt" - ), - File( - results.output_dir - / attrs.evolve(shelly, file_orig=filename_2)._checksum - / "file_2_copy.txt" - ), + assert [f.name for f in results.outputs.file_copy] == [ + "file_1_copy.txt", + "file_2_copy.txt", ] diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index cb4e46311c..7e39fc541b 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1058,10 +1058,29 @@ def optional_type(type_: type) -> type: return type_ -def is_fileset_or_union(type_: type) -> bool: - """Check if the type is a FileSet or a Union containing a FileSet""" +def is_fileset_or_union(type_: type, allow_none: bool | None = None) -> bool: + """Check if the type is a FileSet or a Union containing a FileSet + + Parameters + ---------- + type_ : type + the type to check + allow_none : bool, optional + whether to allow None as a valid type, by default None. If None, then None + is not allowed at the outer layer, but is allowed within a Union + + Returns + ------- + is_fileset : bool + whether the type is a FileSet or a Union containing a FileSet + """ + if type_ is None and allow_none: + return True if is_union(type_): - return any(is_fileset_or_union(t) for t in ty.get_args(type_)) + return any( + is_fileset_or_union(t, allow_none=allow_none or allow_none is None) + for t in ty.get_args(type_) + ) elif not inspect.isclass(type_): return False return issubclass(type_, core.FileSet) From 3418b0d9fbda686da1ad49c2d4709908f0bf4561 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 11:15:47 +1100 Subject: [PATCH 245/342] updated singularity tests --- pydra/engine/tests/test_environments.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 305c0cc844..f67b6a6273 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -125,7 +125,7 @@ def test_singularity_1(tmp_path): def newcache(x): makedir(tmp_path, x) - cmd = ["whoami"] + cmd = "whoami" sing = Singularity(image="docker://alpine") shell_def = shell.define(cmd) shelly = Task( @@ -133,7 +133,7 @@ def newcache(x): submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - assert shell_def.cmdline == " ".join(cmd) + assert shell_def.cmdline == cmd outputs_dict = sing.execute(shelly) with Submitter(cache_dir=newcache("shelly_sub"), environment=sing) as sub: @@ -152,7 +152,7 @@ def test_singularity_1_subm(tmp_path, plugin): def newcache(x): makedir(tmp_path, x) - cmd = ["whoami"] + cmd = "whoami" sing = Singularity(image="docker://alpine") shell_def = shell.define(cmd) shelly = Task( @@ -160,7 +160,7 @@ def newcache(x): submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - assert shell_def.cmdline == " ".join(cmd) + assert shell_def.cmdline == cmd outputs_dict = sing.execute(shelly) with Submitter(worker=plugin) as sub: From a67968b64167172d60715f7e86031090b9493fda Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 11:21:35 +1100 Subject: [PATCH 246/342] updated __version__ location to inside pydra.engine --- .github/workflows/testpsijslurm.yml | 2 +- .github/workflows/testpydra.yml | 2 +- .github/workflows/testslurm.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testpsijslurm.yml b/.github/workflows/testpsijslurm.yml index 57d4e07781..ea661b33bc 100644 --- a/.github/workflows/testpsijslurm.yml +++ b/.github/workflows/testpsijslurm.yml @@ -48,7 +48,7 @@ jobs: docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" fi docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" - docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra; print(pydra.__version__)'" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra; print(pydra.engine.__version__)'" - name: Run pytest run: | docker exec slurm bash -c "pytest --color=yes -vs -n auto --psij=slurm --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index f9f7229a10..c06da2e29b 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -109,7 +109,7 @@ jobs: run: pip install $ARCHIVE - name: Print version - run: python -c "import pydra; print(pydra.__version__)" + run: python -c "import pydra; print(pydra.engine.__version__)" - name: Install Pydra tests dependencies run: pip install pydra[test] diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index 62c1e43792..bac1d9ec5e 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -48,7 +48,7 @@ jobs: docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" fi docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" - docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra; print(pydra.__version__)'" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra; print(pydra.engine.__version__)'" - name: Run pytest run: | docker exec slurm bash -c "pytest --color=yes -vs --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" From ac80cbf8bbf6bd10f70b8c7b17364c37149c65af Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 11:27:03 +1100 Subject: [PATCH 247/342] updated github actions version tests --- .github/workflows/testpsijslurm.yml | 2 +- .github/workflows/testpydra.yml | 2 +- .github/workflows/testslurm.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testpsijslurm.yml b/.github/workflows/testpsijslurm.yml index ea661b33bc..cce4d9c242 100644 --- a/.github/workflows/testpsijslurm.yml +++ b/.github/workflows/testpsijslurm.yml @@ -48,7 +48,7 @@ jobs: docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" fi docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" - docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra; print(pydra.engine.__version__)'" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra.engine; print(pydra.engine.__version__)'" - name: Run pytest run: | docker exec slurm bash -c "pytest --color=yes -vs -n auto --psij=slurm --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index c06da2e29b..702c0f03a7 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -109,7 +109,7 @@ jobs: run: pip install $ARCHIVE - name: Print version - run: python -c "import pydra; print(pydra.engine.__version__)" + run: python -c "import pydra.engine; print(pydra.engine.__version__)" - name: Install Pydra tests dependencies run: pip install pydra[test] diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index bac1d9ec5e..1eaeb53d57 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -48,7 +48,7 @@ jobs: docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" fi docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" - docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra; print(pydra.engine.__version__)'" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra.engine; print(pydra.engine.__version__)'" - name: Run pytest run: | docker exec slurm bash -c "pytest --color=yes -vs --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" From 286a8cbea5e1c8a078284a12b6aa1cdcd0770cff Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 11:39:24 +1100 Subject: [PATCH 248/342] dropped --pyargs flag in CI test as it doesn't work with namespace packages --- .github/workflows/testpydra.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 702c0f03a7..6bb4c0216d 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -119,7 +119,7 @@ jobs: - name: Pytest run: | - pytest -vs -n auto --doctest-modules --pyargs pydra \ + pytest -vs -n auto --doctest-modules \ --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml - name: Upload to codecov From c0bd52ca7c7bb3931052c60d73ff46353231154a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 11:45:51 +1100 Subject: [PATCH 249/342] streamlined test matrices --- .github/workflows/testpydra.yml | 44 ++------------------------------- 1 file changed, 2 insertions(+), 42 deletions(-) diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 6bb4c0216d..c1dfb352cc 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -52,68 +52,28 @@ jobs: matrix: os: [macos-latest, ubuntu-latest, windows-latest] python-version: ['3.11', '3.12', '3.13'] - install: ['wheel'] - include: - - os: 'ubuntu-latest' - python-version: '3.11' - install: 'sdist' - - os: 'ubuntu-latest' - python-version: '3.11' - install: 'repo' - - os: 'ubuntu-latest' - python-version: '3.11' - install: 'archive' fail-fast: false runs-on: ${{ matrix.os }} steps: - - name: Fetch sdist/wheel - uses: actions/download-artifact@v4 - if: matrix.install == 'sdist' || matrix.install == 'wheel' - with: - name: dist - path: dist/ - - name: Fetch git archive - uses: actions/download-artifact@v4 - if: matrix.install == 'archive' - with: - name: archive - path: archive/ - name: Fetch repository uses: actions/checkout@v4 - if: matrix.install == 'repo' - name: Set up Python ${{ matrix.python-version }} on ${{ matrix.os }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Update pip run: python -m pip install --upgrade pip - - name: Determine installation target - run: | - if [[ "$INSTALL" = "sdist" ]]; then - echo "ARCHIVE=$( ls dist/*.tar.gz )" >> $GITHUB_ENV - elif [[ "$INSTALL" = "wheel" ]]; then - echo "ARCHIVE=$( ls dist/*.whl )" >> $GITHUB_ENV - elif [[ "$INSTALL" = "archive" ]]; then - echo "ARCHIVE=$( ls archive/*.zip )" >> $GITHUB_ENV - elif [[ "$INSTALL" = "repo" ]]; then - echo "ARCHIVE=." >> $GITHUB_ENV - fi - env: - INSTALL: ${{ matrix.install }} - - name: Install Pydra - run: pip install $ARCHIVE + run: pip install .[test] - name: Print version run: python -c "import pydra.engine; print(pydra.engine.__version__)" - - name: Install Pydra tests dependencies - run: pip install pydra[test] - - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV From 63b31b5bc96c04445c3eb4011f596bd9ab64e2b9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 12:47:21 +1100 Subject: [PATCH 250/342] reverted ubuntu version to 22.04 for singularity tests --- .github/workflows/testsingularity.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index edcfb7c948..5ffea95168 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -14,7 +14,7 @@ concurrency: jobs: build: name: Build - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: matrix: python-version: ['3.11', '3.12', '3.13'] From a7876a341c75684d3a81b92d67768e9cceecfaa4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 12:51:51 +1100 Subject: [PATCH 251/342] added test_environments to singularity test --- .github/workflows/testsingularity.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index 5ffea95168..5b43fdd771 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -70,6 +70,6 @@ jobs: - name: Pytest - run: pytest -vs --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml pydra/engine/tests/test_singularity.py + run: pytest -vs --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml pydra/engine/tests/test_singularity.py pydra/engine/tests/test_environments.py - name: Upload to codecov run: codecov -f cov.xml -F unittests -e GITHUB_WORKFLOW From 41d9d21d3859c503db353f85c33e5774cf0ba3b5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 12:59:52 +1100 Subject: [PATCH 252/342] fixed up singularity tests --- pydra/engine/tests/test_environments.py | 34 +++++++++++++------------ 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index f67b6a6273..63b50343ea 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,6 +1,5 @@ from pathlib import Path import typing as ty -import attrs from ..environments import Native, Docker, Singularity from ..submitter import Submitter from fileformats.generic import File @@ -77,7 +76,10 @@ def newcache(x): assert attrs_values(result.outputs) == outputs_dict outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) - assert outputs_dict == attrs_values(outputs) + # If busybox isn't found locally, then the stderr will have the download progress from + # the Docker auto-pull in it + for key in ["stdout", "return_code"]: + assert outputs_dict[key] == attrs_values(outputs)[key] @no_win @@ -127,14 +129,15 @@ def newcache(x): cmd = "whoami" sing = Singularity(image="docker://alpine") - shell_def = shell.define(cmd) - shelly = Task( - definition=shell_def, + Shelly = shell.define(cmd) + shelly = Shelly() + shelly_job = Task( + definition=shelly, submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - assert shell_def.cmdline == cmd - outputs_dict = sing.execute(shelly) + assert shelly.cmdline == cmd + outputs_dict = sing.execute(shelly_job) with Submitter(cache_dir=newcache("shelly_sub"), environment=sing) as sub: results = sub(shelly) @@ -154,24 +157,23 @@ def newcache(x): cmd = "whoami" sing = Singularity(image="docker://alpine") - shell_def = shell.define(cmd) - shelly = Task( - definition=shell_def, + Shelly = shell.define(cmd) + shelly = Shelly() + shelly_job = Task( + definition=shelly, submitter=Submitter(cache_dir=newcache("shelly")), name="shelly", ) - assert shell_def.cmdline == cmd - outputs_dict = sing.execute(shelly) + assert shelly.cmdline == cmd + outputs_dict = sing.execute(shelly_job) with Submitter(worker=plugin) as sub: results = sub(shelly) assert outputs_dict == attrs_values(results.outputs) outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) - for key in [ - "stdout", - "return_code", - ]: # singularity gives info about cashed image in stderr + # singularity gives info about cashed image in stderr + for key in ["stdout", "return_code"]: assert outputs_dict[key] == attrs_values(outputs)[key] From bd2b158e2ebf05ef4c26380a3dd0eca9e39c3cab Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 13:13:54 +1100 Subject: [PATCH 253/342] started implementing singularity tests --- pydra/engine/tests/test_singularity.py | 96 +++++++++++--------------- 1 file changed, 41 insertions(+), 55 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 8c21d44289..baa5bed508 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -2,10 +2,9 @@ import subprocess as sp import pytest import attr - -from ..task import ShellDef from ..submitter import Submitter -from ..specs import ShellOutputs, ShellDef +from ..specs import ShellOutputs +from pydra.design import shell from fileformats.generic import File from ..environments import Singularity @@ -30,19 +29,11 @@ def test_singularity_1_nosubm(tmp_path): """ cmd = "pwd" image = "docker://alpine" - singu = ShellDef( - name="singu", - executable=cmd, - environment=Singularity(image=image), - cache_dir=tmp_path, - ) - assert singu.environment.image == "docker://alpine" - assert isinstance(singu.environment, Singularity) - assert singu.cmdline == cmd - - res = singu() - assert "/mnt/pydra" in res.output.stdout - assert res.output.return_code == 0 + Singu = shell.define(cmd) + singu = Singu() + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert "/mnt/pydra" in outputs.stdout + assert outputs.return_code == 0 @need_singularity @@ -52,17 +43,16 @@ def test_singularity_2_nosubm(tmp_path): """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = ShellDef( - name="singu", - executable=cmd, - environment=Singularity(image=image), - cache_dir=tmp_path, - ) + Singu = shell.define(" ".join(cmd)) + singu = Singu() assert singu.cmdline == " ".join(cmd) - res = singu() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 + outputs = singu( + Singularity(image=image), + cache_dir=tmp_path, + ) + assert outputs.stdout.strip() == " ".join(cmd[1:]) + assert outputs.return_code == 0 @need_singularity @@ -72,20 +62,17 @@ def test_singularity_2(plugin, tmp_path): """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" + Singu = shell.define(" ".join(cmd)) + singu = Singu() - singu = ShellDef( - name="singu", - executable=cmd, - environment=Singularity(image=image), - cache_dir=tmp_path, - ) assert singu.cmdline == " ".join(cmd) - with Submitter(worker=plugin) as sub: - singu(submitter=sub) - res = singu.result() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 + with Submitter( + worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path + ) as sub: + res = sub(singu) + assert res.outputs.stdout.strip() == " ".join(cmd[1:]) + assert res.outputs.return_code == 0 @need_singularity @@ -97,20 +84,19 @@ def test_singularity_2a(plugin, tmp_path): cmd_args = ["hail", "pydra"] # separate command into exec + args image = "docker://alpine" - singu = ShellDef( - name="singu", - executable=cmd_exec, - args=cmd_args, + Singu = shell.define(cmd_exec) + singu = Singu(additional_args=cmd_args) + assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" + + with Submitter( + worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path, - ) - assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" + ) as sub: + res = sub(singu) - with Submitter(worker=plugin) as sub: - singu(submitter=sub) - res = singu.result() - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 + assert res.outputs.stdout.strip() == " ".join(cmd_args) + assert res.outputs.return_code == 0 # tests with State @@ -123,15 +109,15 @@ def test_singularity_st_1(plugin, tmp_path): """ cmd = ["pwd", "ls"] image = "docker://alpine" - singu = ShellDef( - name="singu", environment=Singularity(image=image), cache_dir=tmp_path - ).split("executable", executable=cmd) - assert singu.state.splitter == "singu.executable" + Singu = shell.define("dummy") + singu = Singu().split("executable", executable=cmd) - res = singu(plugin=plugin) - assert "/mnt/pydra" in res[0].output.stdout - assert res[1].output.stdout == "" - assert res[0].output.return_code == res[1].output.return_code == 0 + outputs = singu( + plugin=plugin, environment=Singularity(image=image), cache_dir=tmp_path + ) + assert outputs.stdout[0] == "/mnt/pydra" + assert outputs.stdout[1] == "" + assert outputs.return_code == [0, 0] @need_singularity From e680452600900d0f8cd402aecc12155d92240e00 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:08:07 +1100 Subject: [PATCH 254/342] reworked test_singularity for new syntax --- pydra/engine/tests/test_singularity.py | 594 +++++++++---------------- 1 file changed, 215 insertions(+), 379 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index baa5bed508..31f061750a 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -1,10 +1,8 @@ import shutil import subprocess as sp import pytest -import attr from ..submitter import Submitter -from ..specs import ShellOutputs -from pydra.design import shell +from pydra.design import shell, workflow from fileformats.generic import File from ..environments import Singularity @@ -49,7 +47,6 @@ def test_singularity_2_nosubm(tmp_path): outputs = singu( Singularity(image=image), - cache_dir=tmp_path, ) assert outputs.stdout.strip() == " ".join(cmd[1:]) assert outputs.return_code == 0 @@ -90,8 +87,6 @@ def test_singularity_2a(plugin, tmp_path): with Submitter( worker=plugin, - environment=Singularity(image=image), - cache_dir=tmp_path, ) as sub: res = sub(singu) @@ -131,17 +126,16 @@ def test_singularity_st_2(tmp_path, n): """splitter over args (checking bigger splitters if slurm available)""" args_n = list(range(n)) image = "docker://alpine" - singu = ShellDef( - name="singu", - executable="echo", - environment=Singularity(image=image), - cache_dir=tmp_path, - ).split("args", args=args_n) - assert singu.state.splitter == "singu.args" - res = singu(plugin="slurm") - assert "1" in res[1].output.stdout - assert str(n - 1) in res[-1].output.stdout - assert res[0].output.return_code == res[1].output.return_code == 0 + Singu = shell.define("echo") + singu = Singu().split("args", args=args_n) + with Submitter( + plugin="slurm", + ) as sub: + res = sub(singu) + + assert "1" in res.outputs.stdout[1] + assert str(n - 1) in res.outputs.stdout[-1] + assert res.outputs.return_code[0] == res.outputs.return_code[1] == 0 # tests with customized output_spec @@ -156,25 +150,19 @@ def test_singularity_outputspec_1(plugin, tmp_path): cmd = ["touch", "newfile_tmp.txt"] image = "docker://alpine" - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutputs,), - ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - output_spec=my_output_spec, - cache_dir=tmp_path, + Singu = shell.define( + " ".join(cmd), + inputs=[shell.arg(name="newfile", type=File, path_template="newfile_tmp.txt")], ) + singu = Singu() - with Submitter(worker=plugin) as sub: - singu(submitter=sub) + with Submitter( + worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path + ) as sub: + res = sub(singu) - res = singu.result() - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() # tests with customised input_spec @@ -190,37 +178,24 @@ def test_singularity_inputspec_1(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file=filename, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) + singu = Singu(file=filename) - res = singu() - assert res.output.stdout == "hello from pydra" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout.strip() == "hello from pydra" @need_singularity @@ -235,32 +210,23 @@ def test_singularity_inputspec_1a(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - default=filename, - metadata={"position": 1, "argstr": "", "help": "input file"}, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + default=filename, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), ) + singu = Singu(file=filename) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) - - res = singu() - assert res.output.stdout == "hello from pydra" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout.strip() == "hello from pydra" @need_singularity @@ -277,48 +243,31 @@ def test_singularity_inputspec_2(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "input file 1", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - default=filename_2, - metadata={ - "position": 2, - "argstr": "", - "help": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + default=filename_2, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file1=filename_1, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) + singu = Singu(file1=filename_1) - res = singu() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra\nhave a nice one" @need_singularity @@ -337,47 +286,30 @@ def test_singularity_inputspec_2a_except(plugin, tmp_path): image = "docker://alpine" # the field with default value can't be before value without default - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help": "input file 1", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file2=filename_2, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) - res = singu() - assert res.output.stdout == "hello from pydra\nhave a nice one" + singu = Singu(file2=filename_2) + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra\nhave a nice one" @need_singularity @@ -397,48 +329,31 @@ def test_singularity_inputspec_2a(plugin, tmp_path): image = "docker://alpine" # if you want set default in the first field you can use default_value in metadata - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - attr.ib( - type=File, - default=filename_1, - metadata={ - "position": 1, - "argstr": "", - "help": "input file 1", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", ), - ( - "file2", - attr.ib( - type=File, - metadata={ - "position": 2, - "argstr": "", - "help": "input file 2", - }, - ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", ), ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - file2=filename_2, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ) + singu = Singu(file2=filename_2) - res = singu() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra\nhave a nice one" @need_singularity @@ -454,51 +369,35 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "orig file", - "mandatory": True, - "copyfile": True, - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="orig_file", + type=File, + position=1, + argstr="", + help="orig file", + mandatory=True, + copyfile=True, ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help": "output file", - }, - ), + shell.arg( + name="out_file", + type=str, + path_template="{orig_file}", + help="output file", ), ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) + singu = Singu(orig_file=str(file)) - res = singu() - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.fspath.parent == singu.output_dir - with open(res.output.out_file) as f: + assert outputs.out_file.fspath.parent == singu.output_dir + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged with open(file) as f: @@ -521,37 +420,25 @@ def test_singularity_inputspec_state_1(tmp_path): filename = [str(filename_1), str(filename_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ).split("file", file=filename) + singu = Singu().split("file", file=filename) - res = singu() - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout[0] == "hello from pydra" + assert outputs.stdout[1] == "have a nice one" @need_singularity @@ -571,37 +458,25 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): filename = [str(file_1), str(file_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), ) - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=cmd, - input_spec=my_input_spec, - strip=True, - cache_dir=tmp_path, - ).split("file", file=filename) + singu = Singu().split("file", file=filename) - res = singu() - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) + assert outputs.stdout[0] == "hello from pydra" + assert outputs.stdout[1] == "have a nice one" @need_singularity @@ -614,46 +489,31 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): cmd = "cat" image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), - ) - - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) - wf.inputs.cmd = cmd - wf.inputs.file = filename - - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, ) - wf.add(singu) - wf.set_output([("out", wf.singu.lzout.stdout)]) + @workflow.define + def Workflow(cmd: str, file: File) -> str: + singu = workflow.add( + Singu(executable=cmd, file=file), environment=Singularity(image=image) + ) + return singu.stdout - with Submitter(worker="serial") as sub: - wf(submitter=sub) + with Submitter(cache_dir=tmp_path) as sub: + res = sub(Workflow(cmd=cmd, file=filename)) - res = wf.result() - assert res.output.out == "hello from pydra" + assert res.outputs.out == "hello from pydra" @need_singularity @@ -670,47 +530,34 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): filename = [str(file_1), str(file_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) - wf.inputs.cmd = cmd + @workflow.define + def Workflow(cmd: str, file: File) -> str: + singu = workflow.add( + Singu(executable=cmd, file=file), + environment=Singularity(image=image), + ) + return singu.stdout - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=wf.lzin.cmd, - file=wf.lzin.file, - input_spec=my_input_spec, - strip=True, - ) - wf.add(singu) - wf.split("file", file=filename) - - wf.set_output([("out", wf.singu.lzout.stdout)]) + wf = Workflow(cmd=cmd).split("file", file=filename) - with Submitter(worker=plugin) as sub: - wf(submitter=sub) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) - res = wf.result() - assert res[0].output.out == "hello from pydra" - assert res[1].output.out == "have a nice one" + assert res.outputs.out == ["hello from pydra", "have a nice one"] @need_singularity @@ -727,42 +574,31 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): filename = [str(file_1), str(file_2)] image = "docker://alpine" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help": "input file", - }, - ), + Singu = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + mandatory=True, + position=1, + argstr="", + help="input file", ) ], - bases=(ShellDef,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) - wf.inputs.cmd = cmd - wf.inputs.file = filename - - singu = ShellDef( - name="singu", - environment=Singularity(image=image), - executable=wf.lzin.cmd, - input_spec=my_input_spec, - strip=True, - ).split("file", file=wf.lzin.file) - wf.add(singu) + @workflow.define + def Workflow(cmd: str, files: list[File]) -> list[str]: + singu = workflow.add( + Singu(executable=cmd).split(file=files), + environment=Singularity(image=image), + ) + return singu.stdout - wf.set_output([("out", wf.singu.lzout.stdout)]) + wf = Workflow(cmd=cmd, files=filename) - with Submitter(worker=plugin) as sub: - wf(submitter=sub) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == ["hello from pydra", "have a nice one"] + assert res.outputs.out == ["hello from pydra", "have a nice one"] From 77bcafe1ac7c584a1e913297da2bc1cd1e2295bf Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:10:28 +1100 Subject: [PATCH 255/342] fixes to test_singularity tests --- pydra/engine/tests/test_singularity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 31f061750a..651dca21bd 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -86,7 +86,7 @@ def test_singularity_2a(plugin, tmp_path): assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" with Submitter( - worker=plugin, + worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path ) as sub: res = sub(singu) @@ -129,7 +129,7 @@ def test_singularity_st_2(tmp_path, n): Singu = shell.define("echo") singu = Singu().split("args", args=args_n) with Submitter( - plugin="slurm", + plugin="slurm", environment=Singularity(image=image), cache_dir=tmp_path ) as sub: res = sub(singu) From 33e615c2d574c292d675204a859d1c881df7fd2e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:17:31 +1100 Subject: [PATCH 256/342] removed mandatory kwarg in test_singularity shell.defines --- pydra/engine/tests/test_singularity.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 651dca21bd..532e8521a5 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -184,7 +184,6 @@ def test_singularity_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -378,7 +377,6 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): position=1, argstr="", help="orig file", - mandatory=True, copyfile=True, ), shell.arg( @@ -426,7 +424,6 @@ def test_singularity_inputspec_state_1(tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -464,7 +461,6 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -495,7 +491,6 @@ def test_singularity_wf_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -536,7 +531,6 @@ def test_singularity_wf_state_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", @@ -580,7 +574,6 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): shell.arg( name="file", type=File, - mandatory=True, position=1, argstr="", help="input file", From 1be664f1614942fc615e743eb1030f30c1578c6f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:18:41 +1100 Subject: [PATCH 257/342] changed shell.arg to shell.outarg for path_template outputs --- pydra/engine/tests/test_singularity.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 532e8521a5..96471c26a2 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -152,7 +152,9 @@ def test_singularity_outputspec_1(plugin, tmp_path): Singu = shell.define( " ".join(cmd), - inputs=[shell.arg(name="newfile", type=File, path_template="newfile_tmp.txt")], + outputs=[ + shell.outarg(name="newfile", type=File, path_template="newfile_tmp.txt") + ], ) singu = Singu() @@ -379,7 +381,9 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): help="orig file", copyfile=True, ), - shell.arg( + ], + outputs=[ + shell.outarg( name="out_file", type=str, path_template="{orig_file}", From f83b57d0eece589c3b1c5ae5a1da4238a14e7b07 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:22:35 +1100 Subject: [PATCH 258/342] cleaning up more test_environment singularity tests --- pydra/engine/tests/test_environments.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 63b50343ea..68685e5160 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -73,13 +73,16 @@ def newcache(x): with Submitter(cache_dir=newcache("shelly_sub"), environment=docker) as sub: result = sub(shelly) - assert attrs_values(result.outputs) == outputs_dict outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) # If busybox isn't found locally, then the stderr will have the download progress from # the Docker auto-pull in it for key in ["stdout", "return_code"]: - assert outputs_dict[key] == attrs_values(outputs)[key] + assert ( + outputs_dict[key] + == attrs_values(outputs)[key] + == attrs_values(result.outputs)[key] + ) @no_win @@ -167,14 +170,17 @@ def newcache(x): assert shelly.cmdline == cmd outputs_dict = sing.execute(shelly_job) - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=newcache("shelly_sub")) as sub: results = sub(shelly) - assert outputs_dict == attrs_values(results.outputs) outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) # singularity gives info about cashed image in stderr for key in ["stdout", "return_code"]: - assert outputs_dict[key] == attrs_values(outputs)[key] + assert ( + outputs_dict[key] + == attrs_values(outputs)[key] + == attrs_values(results.outputs)[key] + ) def shelly_with_input_factory(filename, executable) -> ShellDef: From 25eed1fd4ff863acf5a2bcfd9dd5c79edff0161e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:25:03 +1100 Subject: [PATCH 259/342] cleaned up singularity unittests --- pydra/engine/tests/test_singularity.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 96471c26a2..b25f4ff131 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -438,8 +438,8 @@ def test_singularity_inputspec_state_1(tmp_path): singu = Singu().split("file", file=filename) outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) - assert outputs.stdout[0] == "hello from pydra" - assert outputs.stdout[1] == "have a nice one" + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @need_singularity @@ -475,8 +475,8 @@ def test_singularity_inputspec_state_1b(plugin, tmp_path): singu = Singu().split("file", file=filename) outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) - assert outputs.stdout[0] == "hello from pydra" - assert outputs.stdout[1] == "have a nice one" + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @need_singularity @@ -512,7 +512,7 @@ def Workflow(cmd: str, file: File) -> str: with Submitter(cache_dir=tmp_path) as sub: res = sub(Workflow(cmd=cmd, file=filename)) - assert res.outputs.out == "hello from pydra" + assert res.outputs.out.strip() == "hello from pydra" @need_singularity @@ -555,7 +555,10 @@ def Workflow(cmd: str, file: File) -> str: with Submitter(worker=plugin, cache_dir=tmp_path) as sub: res = sub(wf) - assert res.outputs.out == ["hello from pydra", "have a nice one"] + assert [o.strip() for o in res.outputs.out] == [ + "hello from pydra", + "have a nice one", + ] @need_singularity @@ -598,4 +601,7 @@ def Workflow(cmd: str, files: list[File]) -> list[str]: with Submitter(worker=plugin, cache_dir=tmp_path) as sub: res = sub(wf) - assert res.outputs.out == ["hello from pydra", "have a nice one"] + assert [o.strip() for o in res.outputs.out] == [ + "hello from pydra", + "have a nice one", + ] From e8a6aa0e2535d4f32f99f2fb349514288b67ffe4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:38:18 +1100 Subject: [PATCH 260/342] debugging singularity tests --- pydra/engine/tests/test_environments.py | 8 +++++--- pydra/engine/tests/test_singularity.py | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 68685e5160..8a7132db0c 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -144,10 +144,10 @@ def newcache(x): with Submitter(cache_dir=newcache("shelly_sub"), environment=sing) as sub: results = sub(shelly) - assert outputs_dict == attrs_values(results.outputs) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(results.outputs)) outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) - assert outputs_dict == attrs_values(outputs) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(outputs)) @no_win @@ -170,7 +170,9 @@ def newcache(x): assert shelly.cmdline == cmd outputs_dict = sing.execute(shelly_job) - with Submitter(worker=plugin, cache_dir=newcache("shelly_sub")) as sub: + with Submitter( + worker=plugin, environment=sing, cache_dir=newcache("shelly_sub") + ) as sub: results = sub(shelly) outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index b25f4ff131..c46eabd737 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -46,7 +46,8 @@ def test_singularity_2_nosubm(tmp_path): assert singu.cmdline == " ".join(cmd) outputs = singu( - Singularity(image=image), + environment=Singularity(image=image), + cache_dir=tmp_path, ) assert outputs.stdout.strip() == " ".join(cmd[1:]) assert outputs.return_code == 0 @@ -379,7 +380,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): position=1, argstr="", help="orig file", - copyfile=True, + copy_mode=File.CopyMode.copy, ), ], outputs=[ From 0c779d28a14a56fbe99277dbd8937bc3b2c28ce6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 16:42:51 +1100 Subject: [PATCH 261/342] fixed overzealous validation of path_template --- pydra/design/shell.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 591f297dfb..ace1eba1a9 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -228,7 +228,7 @@ def _validate_path_template(self, attribute, value): f"path_template ({value!r}) can only be provided when no default " f"({self.default!r}) is provided" ) - if value and not is_fileset_or_union(self.type): + if value and not (is_fileset_or_union(self.type) or self.type is ty.Any): raise ValueError( f"path_template ({value!r}) can only be provided when type is a FileSet, " f"or union thereof, not {self.type!r}" From ab235761e1fd56e3acb8ae9b7c00d7876bb9beb0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sun, 23 Feb 2025 17:34:59 +1100 Subject: [PATCH 262/342] made all args to task call kwds --- pydra/engine/specs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 6b154dc23e..eaa0850ff3 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -178,6 +178,7 @@ class TaskDef(ty.Generic[OutputsType]): def __call__( self, + /, cache_dir: os.PathLike | None = None, worker: "str | ty.Type[Worker] | Worker" = "debug", environment: "Environment | None" = None, From f1a6cb320898ade1f50fdd9f81602efd91bd7d0a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 09:39:17 +1100 Subject: [PATCH 263/342] working on singularity errors --- pydra/engine/submitter.py | 2 +- pydra/engine/tests/test_environments.py | 10 +++------- pydra/engine/tests/test_singularity.py | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 02c51f4216..c36222c467 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -206,7 +206,7 @@ def __call__( @workflow.define(outputs=output_types) def Split(defn: TaskDef, output_types: dict): - node = workflow.add(defn) + node = workflow.add(defn, environment=self.environment, hooks=hooks) return tuple(getattr(node, o) for o in output_types) task_def = Split(defn=task_def, output_types=output_types) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 8a7132db0c..24dd12fd14 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -18,7 +18,7 @@ def makedir(path, name): def drop_stderr(dct: dict[str, ty.Any]): - return {k: v for k, v in dct.items() if k != "stderror"} + return {k: v for k, v in dct.items() if k != "stderr"} def test_native_1(tmp_path): @@ -174,15 +174,11 @@ def newcache(x): worker=plugin, environment=sing, cache_dir=newcache("shelly_sub") ) as sub: results = sub(shelly) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(results.outputs)) outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) # singularity gives info about cashed image in stderr - for key in ["stdout", "return_code"]: - assert ( - outputs_dict[key] - == attrs_values(outputs)[key] - == attrs_values(results.outputs)[key] - ) + assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(outputs)) def shelly_with_input_factory(filename, executable) -> ShellDef: diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index c46eabd737..7ea599af8d 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -386,7 +386,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): outputs=[ shell.outarg( name="out_file", - type=str, + type=File, path_template="{orig_file}", help="output file", ), From d8696f27a5572cecb6a07eeb5d481e326496b834 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 10:12:50 +1100 Subject: [PATCH 264/342] more debugging of singularity --- pydra/design/shell.py | 3 + pydra/engine/submitter.py | 6 +- pydra/engine/tests/test_environments.py | 80 ++++++++++++------------- 3 files changed, 48 insertions(+), 41 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index ace1eba1a9..674b996b94 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -357,6 +357,9 @@ def make( klass = None input_helps, output_helps = {}, {} + if isinstance(wrapped, list): + wrapped = " ".join(wrapped) + executable, inferred_inputs, inferred_outputs = parse_command_line_template( wrapped, inputs=inputs, diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index c36222c467..6240067906 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -211,16 +211,20 @@ def Split(defn: TaskDef, output_types: dict): task_def = Split(defn=task_def, output_types=output_types) + environment = None elif task_def._combiner: raise ValueError( f"Task {self} is marked for combining, but not splitting. " "Use the `split` method to split the task before combining." ) + else: + environment = self.environment + task = Task( task_def, submitter=self, name="main", - environment=self.environment, + environment=environment, hooks=hooks, ) try: diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 24dd12fd14..57be730a4c 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -34,18 +34,18 @@ def newcache(x): shelly_job = Task( definition=shelly, - submitter=Submitter(cache_dir=newcache("shelly-task")), - name="shelly", + submitter=Submitter(cache_dir=newcache("native-task")), + name="native", ) env_outputs = Native().execute(shelly_job) - outputs = shelly(cache_dir=newcache("shelly-exec")) + outputs = shelly(cache_dir=newcache("native-exec")) assert drop_stderr(env_outputs) == drop_stderr(attrs_values(outputs)) - outputs = shelly(environment=Native(), cache_dir=newcache("shelly-call")) + outputs = shelly(environment=Native(), cache_dir=newcache("native-call")) assert env_outputs == attrs_values(outputs) - with Submitter(cache_dir=newcache("shelly-submitter"), environment=Native()) as sub: + with Submitter(cache_dir=newcache("native-submitter"), environment=Native()) as sub: result = sub(shelly) assert drop_stderr(env_outputs) == drop_stderr(attrs_values(result.outputs)) @@ -66,15 +66,15 @@ def newcache(x): shelly_job = Task( definition=shelly, - submitter=Submitter(cache_dir=newcache("shelly")), - name="shelly", + submitter=Submitter(cache_dir=newcache("docker")), + name="docker", ) outputs_dict = docker.execute(shelly_job) - with Submitter(cache_dir=newcache("shelly_sub"), environment=docker) as sub: + with Submitter(cache_dir=newcache("docker_sub"), environment=docker) as sub: result = sub(shelly) - outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) + outputs = shelly(environment=docker, cache_dir=newcache("docker_call")) # If busybox isn't found locally, then the stderr will have the download progress from # the Docker auto-pull in it for key in ["stdout", "return_code"]: @@ -106,19 +106,19 @@ def newcache(x): shelly = shell.define(cmd)() shelly_job = Task( definition=shelly, - submitter=Submitter(cache_dir=newcache("shelly")), - name="shelly", + submitter=Submitter(cache_dir=newcache("docker")), + name="docker", ) assert shelly.cmdline == cmd outputs_dict = docker.execute(shelly_job) with Submitter( - worker="cf", cache_dir=newcache("shelly_sub"), environment=docker + worker="cf", cache_dir=newcache("docker_sub"), environment=docker ) as sub: result = sub(shelly) assert outputs_dict == attrs_values(result.outputs) - outputs = shelly(cache_dir=newcache("shelly_call"), environment=docker) + outputs = shelly(cache_dir=newcache("docker_call"), environment=docker) assert outputs_dict == attrs_values(outputs) @@ -136,17 +136,17 @@ def newcache(x): shelly = Shelly() shelly_job = Task( definition=shelly, - submitter=Submitter(cache_dir=newcache("shelly")), - name="shelly", + submitter=Submitter(cache_dir=newcache("singu")), + name="singu", ) assert shelly.cmdline == cmd outputs_dict = sing.execute(shelly_job) - with Submitter(cache_dir=newcache("shelly_sub"), environment=sing) as sub: + with Submitter(cache_dir=newcache("singu_sub"), environment=sing) as sub: results = sub(shelly) assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(results.outputs)) - outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) + outputs = shelly(environment=sing, cache_dir=newcache("singu_call")) assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(outputs)) @@ -164,19 +164,19 @@ def newcache(x): shelly = Shelly() shelly_job = Task( definition=shelly, - submitter=Submitter(cache_dir=newcache("shelly")), - name="shelly", + submitter=Submitter(cache_dir=newcache("singu")), + name="singu", ) assert shelly.cmdline == cmd outputs_dict = sing.execute(shelly_job) with Submitter( - worker=plugin, environment=sing, cache_dir=newcache("shelly_sub") + worker=plugin, environment=sing, cache_dir=newcache("singu_sub") ) as sub: results = sub(shelly) assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(results.outputs)) - outputs = shelly(environment=sing, cache_dir=newcache("shelly_call")) + outputs = shelly(environment=sing, cache_dir=newcache("singu_call")) # singularity gives info about cashed image in stderr assert drop_stderr(outputs_dict) == drop_stderr(attrs_values(outputs)) @@ -218,14 +218,14 @@ def newcache(x): f.write("hello ") shelly = shelly_with_input_factory(filename=filename, executable="cat") - shelly_job = make_job(shelly, tmp_path, "shelly") + shelly_job = make_job(shelly, tmp_path, "native") outputs_dict = Native().execute(shelly_job) - with Submitter(environment=Native(), cache_dir=newcache("shelly_sub")) as sub: + with Submitter(environment=Native(), cache_dir=newcache("native_sub")) as sub: results = sub(shelly) assert outputs_dict == attrs_values(results.outputs) - outputs = shelly(environment=Native(), cache_dir=newcache("shelly_call")) + outputs = shelly(environment=Native(), cache_dir=newcache("native_call")) assert outputs_dict == attrs_values(outputs) @@ -247,12 +247,12 @@ def newcache(x): filename = [filename_1, filename_2] shelly = shelly_with_input_factory(filename=None, executable="cat") - with Submitter(environment=Native(), cache_dir=newcache("shelly")) as sub: + with Submitter(environment=Native(), cache_dir=newcache("native")) as sub: results = sub(shelly.split(file=filename)) assert [s.strip() for s in results.outputs.stdout] == ["hello", "hi"] outputs = shelly.split(file=filename)( - environment=Native(), cache_dir=newcache("shelly_call") + environment=Native(), cache_dir=newcache("native_call") ) assert [s.strip() for s in outputs.stdout] == ["hello", "hi"] @@ -273,14 +273,14 @@ def newcache(x): f.write("hello ") shelly = shelly_with_input_factory(filename=filename, executable="cat") - outputs_dict = docker.execute(make_job(shelly, tmp_path, "shelly")) + outputs_dict = docker.execute(make_job(shelly, tmp_path, "docker")) with Submitter(environment=docker, cache_dir=newcache("shell_sub")) as sub: results = sub(shelly) assert outputs_dict == attrs_values(results.outputs) - outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) + outputs = shelly(environment=docker, cache_dir=newcache("docker_call")) assert outputs_dict == attrs_values(outputs) @@ -300,18 +300,18 @@ def newcache(x): f.write("hello ") shelly = shelly_with_input_factory(filename=filename, executable="cat") - shelly_job = make_job(shelly, tmp_path, "shelly_job") + shelly_job = make_job(shelly, tmp_path, "docker_job") outputs_dict = docker.execute(shelly_job) with Submitter( - environment=docker, cache_dir=newcache("shelly_sub"), worker=plugin + environment=docker, cache_dir=newcache("docker_sub"), worker=plugin ) as sub: results = sub(shelly) with Submitter(worker=plugin) as sub: results = sub(shelly) assert outputs_dict == attrs_values(results.outputs) - outputs = shelly(environment=docker, cache_dir=newcache("shelly_call")) + outputs = shelly(environment=docker, cache_dir=newcache("docker_call")) assert outputs_dict == attrs_values(outputs) @@ -338,13 +338,13 @@ def newcache(x): shelly = shelly_with_input_factory(filename=None, executable="cat") - with Submitter(environment=docker, cache_dir=newcache("shelly_sub")) as sub: + with Submitter(environment=docker, cache_dir=newcache("docker_sub")) as sub: results = sub(shelly.split(file=filename)) assert [s.strip() for s in results.outputs.stdout] == ["hello", "hi"] outputs = shelly.split(file=filename)( - environment=docker, cache_dir=newcache("shelly_call") + environment=docker, cache_dir=newcache("docker_call") ) assert [s.strip() for s in outputs.stdout] == ["hello", "hi"] @@ -391,16 +391,16 @@ def newcache(x): # execute does not create the cashedir, so this part will fail, # but I guess we don't want to use it this way anyway - # shelly = create_shelly_outputfile(tempdir=tmp_path, filename=filename, name="shelly") + # shelly = create_shelly_outputfile(tempdir=tmp_path, filename=filename, name="native") # outputs_dict = Native().execute(shelly) shelly = shelly_outputfile_factory(filename=filename) - with Submitter(environment=Native(), cache_dir=newcache("shelly_sub")) as sub: + with Submitter(environment=Native(), cache_dir=newcache("native_sub")) as sub: result = sub(shelly) assert Path(result.outputs.file_copy) == result.output_dir / "file_copy.txt" - call_cache = newcache("shelly_call") + call_cache = newcache("native_call") outputs = shelly(environment=Native(), cache_dir=call_cache) assert Path(outputs.file_copy) == call_cache / shelly._checksum / "file_copy.txt" @@ -424,7 +424,7 @@ def newcache(x): filename = [filename_1, filename_2] shelly = shelly_outputfile_factory(filename=None) - with Submitter(environment=Native(), cache_dir=newcache("shelly")) as sub: + with Submitter(environment=Native(), cache_dir=newcache("native")) as sub: results = sub(shelly.split(file_orig=filename)) assert [f.name for f in results.outputs.file_copy] == [ @@ -432,7 +432,7 @@ def newcache(x): "file_2_copy.txt", ] - call_cache = newcache("shelly_call") + call_cache = newcache("native_call") outputs = shelly.split(file_orig=filename)( environment=Native(), cache_dir=call_cache @@ -457,7 +457,7 @@ def newcache(x): shelly = shelly_outputfile_factory(filename=filename) - with Submitter(environment=docker_env, cache_dir=newcache("shelly")) as sub: + with Submitter(environment=docker_env, cache_dir=newcache("docker")) as sub: results = sub(shelly) assert results.outputs.file_copy == File(results.output_dir / "file_copy.txt") @@ -485,7 +485,7 @@ def newcache(x): shelly = shelly_outputfile_factory(filename=None) - with Submitter(environment=docker_env, cache_dir=newcache("shelly_sub")) as sub: + with Submitter(environment=docker_env, cache_dir=newcache("docker_sub")) as sub: results = sub(shelly.split(file_orig=filename)) assert [f.name for f in results.outputs.file_copy] == [ "file_1_copy.txt", From b30d982905db087c492c98025b452b2d578c6710 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 10:28:01 +1100 Subject: [PATCH 265/342] more debugging --- pydra/engine/tests/test_singularity.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 7ea599af8d..afeb9fb830 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -103,7 +103,7 @@ def test_singularity_st_1(plugin, tmp_path): """commands without arguments in container splitter = executable """ - cmd = ["pwd", "ls"] + cmd = ["whoami", "pwd", "ls"] image = "docker://alpine" Singu = shell.define("dummy") singu = Singu().split("executable", executable=cmd) @@ -111,9 +111,10 @@ def test_singularity_st_1(plugin, tmp_path): outputs = singu( plugin=plugin, environment=Singularity(image=image), cache_dir=tmp_path ) - assert outputs.stdout[0] == "/mnt/pydra" - assert outputs.stdout[1] == "" - assert outputs.return_code == [0, 0] + assert outputs.stdout[0] == "root" + assert outputs.stdout[1] == "/mnt/pydra" + assert outputs.stdout[2] == "" + assert outputs.return_code == [0, 0, 0] @need_singularity @@ -372,7 +373,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): image = "docker://alpine" Singu = shell.define( - cmd, + " ".join(cmd), inputs=[ shell.arg( name="orig_file", From bc5397d4fcff7d1a89981356f1ef8cdf5c55fcf0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 10:46:39 +1100 Subject: [PATCH 266/342] attempting to fix singularity tests --- pydra/engine/submitter.py | 10 +++++++--- pydra/engine/tests/test_singularity.py | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 6240067906..9b9d647159 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -190,6 +190,8 @@ def __call__( result : Any The result of the task """ + from pydra.engine.environments import Environment + if raise_errors is None: raise_errors = self.worker_name == "debug" if not isinstance(raise_errors, bool): @@ -205,11 +207,13 @@ def __call__( output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} @workflow.define(outputs=output_types) - def Split(defn: TaskDef, output_types: dict): - node = workflow.add(defn, environment=self.environment, hooks=hooks) + def Split(defn: TaskDef, output_types: dict, environment: Environment): + node = workflow.add(defn, environment=environment, hooks=hooks) return tuple(getattr(node, o) for o in output_types) - task_def = Split(defn=task_def, output_types=output_types) + task_def = Split( + defn=task_def, output_types=output_types, environment=self.environment + ) environment = None elif task_def._combiner: diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index afeb9fb830..9953b4d661 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -4,7 +4,7 @@ from ..submitter import Submitter from pydra.design import shell, workflow from fileformats.generic import File -from ..environments import Singularity +from pydra.engine.environments import Singularity need_docker = pytest.mark.skipif( @@ -369,11 +369,11 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): with open(file, "w") as f: f.write("hello from pydra\n") - cmd = ["sed", "-is", "s/hello/hi/"] + cmd = "sed -is 's/hello/hi/'" image = "docker://alpine" Singu = shell.define( - " ".join(cmd), + cmd, inputs=[ shell.arg( name="orig_file", From b1184d4731f2b282c68bfc357b1a75d474bcbc07 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 11:10:28 +1100 Subject: [PATCH 267/342] fixed up environments import --- pydra/engine/tests/test_environments.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index 57be730a4c..d18f0215da 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -1,7 +1,7 @@ from pathlib import Path import typing as ty -from ..environments import Native, Docker, Singularity -from ..submitter import Submitter +from pydra.engine.environments import Native, Docker, Singularity +from pydra.engine.submitter import Submitter from fileformats.generic import File from pydra.design import shell from pydra.engine.core import Task From c8bdd3b742616bcb5ff87c7b305275b0527de947 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 11:32:32 +1100 Subject: [PATCH 268/342] shell.define can take a list of tokens as well as a single string --- pydra/design/shell.py | 26 ++++++++++-------- pydra/engine/tests/test_singularity.py | 38 ++++++++++++-------------- 2 files changed, 31 insertions(+), 33 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 674b996b94..bd48c2a8dc 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -4,6 +4,7 @@ import typing as ty import re from collections import defaultdict +import shlex import inspect from copy import copy import attrs @@ -350,16 +351,13 @@ def make( ShellDef, ShellOutputs, klass, arg, out, auto_attribs ) else: - if not isinstance(wrapped, str): + if not isinstance(wrapped, (str, list)): raise ValueError( f"wrapped must be a class or a string, not {wrapped!r}" ) klass = None input_helps, output_helps = {}, {} - if isinstance(wrapped, list): - wrapped = " ".join(wrapped) - executable, inferred_inputs, inferred_outputs = parse_command_line_template( wrapped, inputs=inputs, @@ -447,8 +445,10 @@ def make( # If wrapped is provided (i.e. this is not being used as a decorator), return the # interface class if wrapped is not None: - if not isinstance(wrapped, (type, str)): - raise ValueError(f"wrapped must be a class or a string, not {wrapped!r}") + if not isinstance(wrapped, (type, str, list)): + raise ValueError( + f"wrapped must be a class, a string or a list, not {wrapped!r}" + ) return make(wrapped) return make @@ -516,10 +516,13 @@ def parse_command_line_template( else: assert outputs is None outputs = {} - parts = template.split() + if isinstance(template, list): + tokens = template + else: + tokens = shlex.split(template) executable = [] start_args_index = 0 - for part in parts: + for part in tokens: if part.startswith("<") or part.startswith("-"): break executable.append(part) @@ -528,10 +531,9 @@ def parse_command_line_template( raise ValueError(f"Found no executable in command line template: {template}") if len(executable) == 1: executable = executable[0] - args_str = " ".join(parts[start_args_index:]) - if not args_str: + tokens = tokens[start_args_index:] + if not tokens: return executable, inputs, outputs - tokens = re.split(r"\s+", args_str.strip()) arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+\*]+(?:\?|=[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) @@ -673,7 +675,7 @@ def from_type_str(type_str) -> type: option = token else: raise ValueError( - f"Found unknown token '{token}' in command line template: {template}" + f"Found unknown token {token!r} in command line template: {template}" ) remaining_pos = remaining_positions(arguments, len(arguments) + 1, 1) diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 9953b4d661..92fc6c1e9e 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -1,7 +1,8 @@ import shutil import subprocess as sp import pytest -from ..submitter import Submitter +from pydra.engine.submitter import Submitter +from pydra.engine.specs import ShellDef, ShellOutputs from pydra.design import shell, workflow from fileformats.generic import File from pydra.engine.environments import Singularity @@ -369,30 +370,25 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): with open(file, "w") as f: f.write("hello from pydra\n") - cmd = "sed -is 's/hello/hi/'" image = "docker://alpine" - Singu = shell.define( - cmd, - inputs=[ - shell.arg( - name="orig_file", - type=File, - position=1, - argstr="", - help="orig file", - copy_mode=File.CopyMode.copy, - ), - ], - outputs=[ - shell.outarg( - name="out_file", - type=File, + @shell.define + class Singu(ShellDef["Singu.Outputs"]): + + executable = ["sed", "-is", "'s/hello/hi/'"] + + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + copy_mode=File.CopyMode.copy, + ) + + class Outputs(ShellOutputs): + out_file: File = shell.outarg( path_template="{orig_file}", help="output file", - ), - ], - ) + ) singu = Singu(orig_file=str(file)) From 2fc135f9e85a48b06ae6bdb1fa399055705785b3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 14:49:26 +1100 Subject: [PATCH 269/342] debugging docker unittests --- pydra/design/shell.py | 17 +- pydra/engine/core.py | 6 +- pydra/engine/specs.py | 70 +++-- pydra/engine/submitter.py | 4 +- pydra/engine/tests/test_boutiques.py | 4 +- pydra/engine/tests/test_dockertask.py | 365 +++++++++++++------------- pydra/engine/tests/test_shelltask.py | 106 ++++---- pydra/engine/tests/utils.py | 24 +- 8 files changed, 310 insertions(+), 286 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index bd48c2a8dc..b87e759445 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -152,13 +152,28 @@ class out(Out): passed) or any input field name (a specific input field will be sent). """ - callable: ty.Callable | None = None + callable: ty.Callable | None = attrs.field(default=None) def __attrs_post_init__(self): # Set type from return annotation of callable if not set if self.type is ty.Any and self.callable: self.type = ty.get_type_hints(self.callable).get("return", ty.Any) + @callable.validator + def _callable_validator(self, _, value): + + if value: + if not callable(value): + raise ValueError(f"callable must be a function, not {value!r}") + elif not getattr(self, "path_template", None) and self.name not in [ + "return_code", + "stdout", + "stderr", + ]: # ShellOutputs.BASE_NAMES + raise ValueError( + "A shell output field must have either a callable or a path_template" + ) + @attrs.define(kw_only=True) class outarg(arg, Out): diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 2c169b3ffc..b8bd5a87c7 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -44,7 +44,7 @@ ) from .helpers_file import copy_nested_files, template_update from pydra.utils.messenger import AuditFlag -from pydra.engine.environments import Environment, Native +from pydra.engine.environments import Environment logger = logging.getLogger("pydra") @@ -134,7 +134,9 @@ def __init__( # We save the submitter is the definition is a workflow otherwise we don't # so the task can be pickled self.submitter = submitter - self.environment = environment if environment is not None else Native() + self.environment = ( + environment if environment is not None else submitter.environment + ) self.name = name self.state_index = state_index diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index eaa0850ff3..cd7f43b4ae 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -946,48 +946,46 @@ def _resolve_value( if not cls._required_fields_satisfied(fld, task.definition): return None - elif isinstance(fld, shell.outarg) and fld.path_template: + if isinstance(fld, shell.outarg) and fld.path_template: return template_update_single( fld, definition=task.definition, output_dir=task.output_dir, spec_type="output", ) - elif fld.callable: - callable_ = fld.callable - if isinstance(fld.callable, staticmethod): - # In case callable is defined as a static method, - # retrieve the function wrapped in the descriptor. - callable_ = fld.callable.__func__ - call_args = inspect.getfullargspec(callable_) - call_args_val = {} - for argnm in call_args.args: - if argnm == "field": - call_args_val[argnm] = fld - elif argnm == "output_dir": - call_args_val[argnm] = task.output_dir - elif argnm == "inputs": - call_args_val[argnm] = task.inputs - elif argnm == "stdout": - call_args_val[argnm] = task.return_values["stdout"] - elif argnm == "stderr": - call_args_val[argnm] = task.return_values["stderr"] - else: - try: - call_args_val[argnm] = task.inputs[argnm] - except KeyError as e: - e.add_note( - f"arguments of the callable function from {fld.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" - ) - raise - return callable_(**call_args_val) - else: - raise Exception( - f"Metadata for '{fld.name}', does not not contain any of the required fields " - f'("callable", "output_file_template" or "value"): {fld}.' - ) + assert fld.callable, ( + f"Output field '{fld.name}', does not not contain any of the required fields " + f'("callable", "output_file_template" or "value"): {fld}.' + ) + callable_ = fld.callable + if isinstance(fld.callable, staticmethod): + # In case callable is defined as a static method, + # retrieve the function wrapped in the descriptor. + callable_ = fld.callable.__func__ + call_args = inspect.getfullargspec(callable_) + call_args_val = {} + for argnm in call_args.args: + if argnm == "field": + call_args_val[argnm] = fld + elif argnm == "output_dir": + call_args_val[argnm] = task.output_dir + elif argnm == "inputs": + call_args_val[argnm] = task.inputs + elif argnm == "stdout": + call_args_val[argnm] = task.return_values["stdout"] + elif argnm == "stderr": + call_args_val[argnm] = task.return_values["stderr"] + else: + try: + call_args_val[argnm] = task.inputs[argnm] + except KeyError as e: + e.add_note( + f"arguments of the callable function from {fld.name} " + f"has to be in inputs or be field or output_dir, " + f"but {argnm} is used" + ) + raise + return callable_(**call_args_val) ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 9b9d647159..eecdf420e3 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -207,7 +207,9 @@ def __call__( output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} @workflow.define(outputs=output_types) - def Split(defn: TaskDef, output_types: dict, environment: Environment): + def Split( + defn: TaskDef, output_types: dict, environment: Environment | None + ): node = workflow.add(defn, environment=environment, hooks=hooks) return tuple(getattr(node, o) for o in output_types) diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index cc5635a936..79652a6d58 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -3,7 +3,7 @@ import attr import pytest from pydra.engine.helpers import attrs_values -from .utils import result_no_submitter, result_submitter, no_win +from .utils import run_no_submitter, run_submitter, no_win from pydra.design import workflow, boutiques, shell need_bosh_docker = pytest.mark.skipif( @@ -22,7 +22,7 @@ @pytest.mark.parametrize( "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"] ) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir): """simple task to run fsl.bet using BoshTask""" btask = boutiques.define(zenodo_id="1482743") diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 7d54f26805..b1e86e0ba7 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,11 +1,11 @@ import pytest -from pydra.engine.specs import ShellDef from pydra.engine.submitter import Submitter +from pydra.engine.specs import ShellDef, ShellOutputs from fileformats.generic import File from pydra.engine.environments import Docker from pydra.design import shell, workflow from pydra.engine.core import Task -from .utils import no_win, need_docker, result_submitter, result_no_submitter +from .utils import no_win, need_docker, run_submitter, run_no_submitter @no_win @@ -25,11 +25,11 @@ def test_docker_1_nosubm(): assert docky_task.environment.image == "busybox" assert docky_task.environment.tag == "latest" assert isinstance(docky_task.environment, Docker) - assert docky_task.cmdline == cmd + assert docky.cmdline == cmd - res = docky_task() - assert res.output.stdout == "root\n" - assert res.output.return_code == 0 + res = docky_task.run() + assert res.outputs.stdout == "root\n" + assert res.outputs.return_code == 0 @no_win @@ -51,8 +51,8 @@ def test_docker_1(plugin): @no_win @need_docker -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_docker_2(results_function, plugin): +@pytest.mark.parametrize("run_function", [run_no_submitter, run_submitter]) +def test_docker_2(run_function, plugin, tmp_path): """a command with arguments, cmd and args given as executable with and without submitter """ @@ -61,29 +61,28 @@ def test_docker_2(results_function, plugin): docky = Docky() # cmdline doesn't know anything about docker assert docky.cmdline == cmdline - res = results_function(docky, plugin) - assert res.output.stdout.strip() == " ".join(cmdline.split()[1:]) - assert res.output.return_code == 0 + outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) + assert outputs.stdout.strip() == " ".join(cmdline.split()[1:]) + assert outputs.return_code == 0 @no_win @need_docker -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_docker_2a(results_function, plugin): +@pytest.mark.parametrize("run_function", [run_no_submitter, run_submitter]) +def test_docker_2a(run_function, plugin, tmp_path): """a command with arguments, using executable and args using submitter """ - cmd_exec = "echo" - cmd_args = ["hail", "pydra"] + cmd = ["echo", "hail", "pydra"] # separate command into exec + args - Docky = shell.define(" ".join([cmd_exec] + cmd_args)) + Docky = shell.define(cmd) docky = Docky() - assert docky.executable == "echo" - assert docky.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" + assert docky.executable == cmd + assert docky.cmdline == " ".join(cmd) - res = results_function(docky, plugin) - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 + outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) + assert outputs.stdout.strip() == " ".join(cmd[1:]) + assert outputs.return_code == 0 # tests with State @@ -91,21 +90,19 @@ def test_docker_2a(results_function, plugin): @no_win @need_docker -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_docker_st_1(results_function, plugin): +@pytest.mark.parametrize("run_function", [run_no_submitter, run_submitter]) +def test_docker_st_1(run_function, plugin, tmp_path): """commands without arguments in container splitter = executable """ cmd = ["pwd", "whoami"] - Docky = shell.define("placeholder") + Docky = shell.define("docky") # cmd is just a placeholder docky = Docky().split(executable=cmd) - assert docky.state.splitter == "docky.executable" - - res = results_function(docky, plugin) - assert res[0].output.stdout == f"/mnt/pydra{docky.output_dir[0]}\n" - assert res[1].output.stdout == "root\n" - assert res[0].output.return_code == res[1].output.return_code == 0 + outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) + assert outputs.stdout[0] == f"/mnt/pydra{docky.output_dir[0]}\n" + assert outputs.stdout[1] == "root\n" + assert outputs.return_code[0] == outputs.return_code[1] == 0 # tests with customized output_spec @@ -119,12 +116,11 @@ def test_docker_outputspec_1(plugin, tmp_path): output_path is automatically added to the bindings """ outputs = [shell.out(name="newfile", type=File, help="new file")] - docky = shell.define("touch newfile_tmp.txt", outputs=outputs)( - environment=Docker(image="ubuntu") - ) + Docky = shell.define("touch newfile_tmp.txt", outputs=outputs) + docky = Docky() - res = docky(plugin=plugin) - assert res.output.stdout == "" + outputs = docky(plugin=plugin, environment=Docker(image="ubuntu")) + assert outputs.stdout == "" # tests with customised input_spec @@ -156,8 +152,8 @@ def test_docker_inputspec_1(tmp_path): strip=True, ) - res = docky() - assert res.output.stdout == "hello from pydra" + outputs = docky() + assert outputs.stdout.strip() == "hello from pydra" @no_win @@ -188,8 +184,8 @@ def test_docker_inputspec_1a(tmp_path): strip=True, ) - res = docky() - assert res.output.stdout == "hello from pydra" + outputs = docky() + assert outputs.stdout.strip() == "hello from pydra" @no_win @@ -206,32 +202,35 @@ def test_docker_inputspec_2(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file1", - type=File, - position=1, - argstr="", - help="input file 1", - ), - shell.arg( - name="file2", - type=File, - default=filename_2, - position=2, - argstr="", - help="input file 2", - ), - ] - docky = shell.define(cmd, inputs=inputs)( - name="docky", - environment=Docker(image="busybox"), + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + position=1, + argstr="", + help="input file 1", + ), + shell.arg( + name="file2", + type=File, + default=filename_2, + position=2, + argstr="", + help="input file 2", + ), + ], + ) + docky = Docky( file1=filename_1, - strip=True, ) - res = docky() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = docky( + name="docky", + environment=Docker(image="busybox"), + ) + assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @no_win @@ -249,33 +248,34 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file1", - type=File, - default=filename_1, - position=1, - argstr="", - help="input file 1", - ), - shell.arg( - name="file2", - type=File, - position=2, - argstr="", - help="input file 2", - ), - ] + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", + ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", + ), + ], + ) - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), + docky = Docky( file2=filename_2, - strip=True, ) - assert docky.definition.file2.fspath == filename_2 + assert docky.file2.fspath == filename_2 - res = docky() - assert res.output.stdout == "hello from pydra\nhave a nice one" + outputs = docky(environment=Docker(image="busybox")) + assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @no_win @@ -294,32 +294,31 @@ def test_docker_inputspec_2a(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file1", - type=File, - default=filename_1, - position=1, - argstr="", - help="input file 1", - ), - shell.arg( - name="file2", - type=File, - position=2, - argstr="", - help="input file 2", - ), - ] - - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - file2=filename_2, - strip=True, + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file1", + type=File, + default=filename_1, + position=1, + argstr="", + help="input file 1", + ), + shell.arg( + name="file2", + type=File, + position=2, + argstr="", + help="input file 2", + ), + ], ) - res = docky() - assert res.output.stdout == "hello from pydra\nhave a nice one" + docky = Docky(file2=filename_2) + + outputs = docky(environment=Docker(image="busybox")) + assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @no_win @@ -350,8 +349,8 @@ def test_docker_inputspec_3(plugin, tmp_path): ) cmdline = docky.cmdline - res = docky() - assert "docker" in res.output.stdout + outputs = docky() + assert "docker" in outputs.stdout assert cmdline == docky.cmdline @@ -366,36 +365,30 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): with open(file, "w") as f: f.write("hello from pydra\n") - cmd = ["sed", "-is", "s/hello/hi/"] - - inputs = [ - shell.arg( - name="orig_file", - type=File, + @shell.define + class Docky(ShellDef["Docky.Outputs"]): + executable = ["sed", "-is", "s/hello/hi/"] + orig_file: File = shell.arg( position=1, argstr="", help="orig file", - copyfile="copy", - ), - shell.arg( - name="out_file", - type=str, - output_file_template="{orig_file}", - help="output file", - ), - ] + copy_mode="copy", + ) - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - orig_file=str(file), - ) + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + path_template="{orig_file}.txt", + help="output file", + ) + + docky = Docky(orig_file=str(file)) - res = docky() - assert res.output.stdout == "" - out_file = res.output.out_file.fspath + outputs = docky(environment=Docker(image="busybox"), cache_dir=tmp_path) + assert outputs.stdout == "" + out_file = outputs.out_file.fspath assert out_file.exists() # the file is copied, and then it is changed in place - assert out_file.parent == docky.output_dir + assert out_file.parent.parent == tmp_path with open(out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged @@ -418,24 +411,24 @@ def test_docker_inputspec_state_1(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - argstr="", - help="input file", - ) - ] - - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - strip=True, + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + ) + ], ) - res = docky(split={"file": [str(filename_1), str(filename_2)]}) - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + docky = Docky().split(file=[str(filename_1), str(filename_2)]) + + outputs = docky(environment=Docker(image="busybox")) + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @no_win @@ -454,23 +447,23 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - argstr="", - help="input file", - ) - ] - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - strip=True, + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + ) + ], ) + docky = Docky().split(file=[str(file_1), str(file_2)]) - res = docky(split={"file": [str(file_1), str(file_2)]}) - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + outputs = docky(environment=Docker(image="busybox")) + assert outputs.stdout[0].strip() == "hello from pydra" + assert outputs.stdout[1].strip() == "have a nice one" @no_win @@ -508,8 +501,8 @@ def Workflow(cmd, file): wf = Workflow(cmd=cmd, file=filename) - res = wf.result() - assert res.output.out == "hello from pydra" + outputs = wf() + assert outputs.ou.strip() == "hello from pydra" @no_win @@ -525,35 +518,35 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - argstr="", - help="input file", - ) - ] + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + ) + ], + ) @workflow.define def Workflow(cmd, file): docky = workflow.add( - shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - file=file, - strip=True, - ) + Docky(file=file), + environment=Docker(image="busybox"), ) return docky.stdout wf = Workflow(cmd=cmd) - res = wf(split={"file": [file_1, file_2]}) + outputs = wf(split={"file": [file_1, file_2]}) - assert res[0].output.out == "hello from pydra" - assert res[1].output.out == "have a nice one" + assert outputs.out[0].strip() == "hello from pydra" + assert outputs.out[1].strip() == "have a nice one" @no_win @@ -594,5 +587,5 @@ def Workflow(cmd, file): wf = Workflow(cmd=cmd) - res = wf(split={"file": [str(file_1), str(file_2)]}) - assert res.output.out == ["hello from pydra", "have a nice one"] + outputs = wf(split={"file": [str(file_1), str(file_2)]}) + assert outputs.out == ["hello from pydra", "have a nice one"] diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index d32d5d32ff..17dec45a0f 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -21,14 +21,14 @@ MultiOutputFile, MultiInputObj, ) -from .utils import result_no_submitter, result_submitter, no_win +from .utils import run_no_submitter, run_submitter, no_win if sys.platform.startswith("win"): pytest.skip("SLURM not available in windows", allow_module_level=True) @pytest.mark.flaky(reruns=2) # when dask -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): """simple command, no arguments""" cmd = ["pwd"] @@ -41,7 +41,7 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): assert res.output.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_1_strip(plugin, results_function, tmp_path): """simple command, no arguments strip option to remove \n at the end os stdout @@ -57,7 +57,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): assert res.output.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2(plugin, results_function, tmp_path): """a command with arguments, cmd and args given as executable""" cmd = ["echo", "hail", "pydra"] @@ -71,7 +71,7 @@ def test_shell_cmd_2(plugin, results_function, tmp_path): assert res.output.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2a(plugin, results_function, tmp_path): """a command with arguments, using executable and args""" cmd_exec = "echo" @@ -88,7 +88,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): assert res.output.stderr == "" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2b(plugin, results_function, tmp_path): """a command with arguments, using strings executable and args""" cmd_exec = "echo" @@ -276,7 +276,7 @@ def test_wf_shell_cmd_1(plugin, tmp_path): # customised input definition -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): """a command with executable, args and one command opt, using a customized input_spec to add the opt to the command @@ -316,7 +316,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): assert res.output.stdout == "hello from pydra" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): """a command with executable, args and two command options, using a customized input_spec to add the opt to the command @@ -364,7 +364,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): assert res.output.stdout == "HELLO from pydra" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" @@ -402,7 +402,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): assert res.output.stdout == "HELLO\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided using shorter syntax for input definition (no attr.ib) @@ -435,7 +435,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): assert res.output.stdout == "HELLO\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided after init""" cmd_exec = "echo" @@ -502,7 +502,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): assert "mandatory" in str(excinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): """mandatory=False, so tasks runs fine even without the value""" cmd_exec = "echo" @@ -537,7 +537,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): assert res.output.stdout == "\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" @@ -568,7 +568,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): assert res.output.stdout == "Hello\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided using shorter syntax for input definition (no attr.ib) @@ -592,7 +592,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): assert res.output.stdout == "Hello\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" @@ -683,7 +683,7 @@ def test_shell_cmd_inputspec_4d_exception(plugin): ShellDef(name="shelly", executable=cmd_exec, input_spec=my_input_spec) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): """checking xor in metadata: task should work fine, since only one option is True""" cmd_exec = "ls" @@ -781,7 +781,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): assert "is mutually exclusive" in str(excinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): """checking requires in metadata: the required field is set in the init, so the task works fine @@ -869,7 +869,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): assert "requires" in str(excinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): """checking requires in metadata: the required field set after the init @@ -918,7 +918,7 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): results_function(shelly, plugin) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): """ providing output name using input_spec, @@ -961,7 +961,7 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): assert out1.name == "newfile_tmp.txt" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): """ providing output name using input_spec, @@ -1004,7 +1004,7 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): assert res.output.out1_changed.fspath.name == "newfile_tmp.txt" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, @@ -1049,7 +1049,7 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): assert res.output.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): """ providing output name using input_spec, @@ -1090,7 +1090,7 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): assert res.output.out1.fspath.name == "newfile_tmp.txt" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, @@ -1147,7 +1147,7 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): assert res.output.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, @@ -1204,7 +1204,7 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): assert res.output.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata), @@ -1257,7 +1257,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): assert shelly.output_dir == res.output.file_copy.fspath.parent -@pytest.mark.parametrize("results_function", [result_no_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter]) def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata), @@ -1306,7 +1306,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): assert shelly.output_dir == res.output.file_copy.fspath.parent -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata) @@ -1356,7 +1356,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): assert res.output.file_copy.fspath.name == "file_copy" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata) @@ -1408,7 +1408,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): assert res.output.file_copy.fspath.parent == shelly.output_dir -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): """ providing output name explicitly by manually setting value in input_spec @@ -1462,7 +1462,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): assert shelly.output_dir == res.output.file_copy.fspath.parent -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): """using input_spec, providing list of files as an input""" @@ -1605,7 +1605,7 @@ def test_shell_cmd_inputspec_11(tmp_path): assert out_file.fspath.name == "test1" or out_file.fspath.name == "test2" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_12(tmp_path: Path, plugin, results_function): """ providing output name using input_spec @@ -1708,7 +1708,7 @@ def test_shell_cmd_inputspec_with_iterable(): assert task.cmdline == "test --in1 0 1 2 --in2 bar --in2 foo" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): """shelltask changes a file in place, adding copyfile=True to the file-input from input_spec @@ -1770,7 +1770,7 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): assert "hello from pydra\n" == f.read() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): """shelltask changes a file in place, adding copyfile=False to the File-input from input_spec @@ -1850,7 +1850,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): "if we allow for this orig_file is changing, so does checksum," " and the results can't be found" ) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): """shelltask changes a file in place, copyfile is None for the file-input, so original filed is changed @@ -1907,7 +1907,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): assert "hi from pydra\n" == f.read() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): """adding state to the input from input_spec""" cmd_exec = "echo" @@ -1986,7 +1986,7 @@ def test_shell_cmd_inputspec_typeval_2(): ShellDef(executable=cmd_exec, text="hello", input_spec=my_input_spec) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): """adding state to the input from input_spec using shorter syntax for input_spec (without default) @@ -2018,7 +2018,7 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): assert res[1].output.stdout == "hi\n" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): """ adding splitter to input that is used in the output_file_tamplate @@ -2057,7 +2057,7 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): assert res[i].output.out1.fspath.parent == shelly.output_dir[i] -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): """adding state to the File-input from input_spec""" @@ -2104,7 +2104,7 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): assert res[1].output.stdout == "have a nice one" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path): """adding state to the File-input from input_spec""" @@ -2654,7 +2654,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): # customised output definition -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname @@ -2674,7 +2674,7 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): assert res.output.newfile.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_1a(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname @@ -2714,7 +2714,7 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): assert "does not exist" in str(exinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_2(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, @@ -2756,7 +2756,7 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): assert "no file matches" in str(excinfo.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, @@ -2779,7 +2779,7 @@ def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): assert all([file.fspath.exists() for file in res.output.newfile]) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, @@ -2818,7 +2818,7 @@ def gather_output(field, output_dir): ) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5a(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, @@ -2874,7 +2874,7 @@ def gather_output(executable, output_dir, ble): shelly() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): """ Customised output definition defined as a class, @@ -2904,7 +2904,7 @@ def gather_output(executable, output_dir): assert all([file.exists() for file in res.output.newfile]) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): """ providing output name by providing output_file_template @@ -2972,7 +2972,7 @@ def test_shell_cmd_outputspec_6a(): assert res.output.out1.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): """ providing output with output_file_name and using MultiOutputFile as a type. @@ -3048,7 +3048,7 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): assert file.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): """ providing output with output_file_name and using MultiOutputFile as a type. @@ -3126,7 +3126,7 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): assert res.output.new_files.fspath.exists() -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_8a(tmp_path, plugin, results_function): """ customised output_spec, adding int and str to the output, @@ -3214,7 +3214,7 @@ def test_shell_cmd_outputspec_8b_error(): assert "has to have a callable" in str(e.value) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_8c(tmp_path, plugin, results_function): """ customised output_spec, adding Directory to the output named by args @@ -3257,7 +3257,7 @@ def get_lowest_directory(directory_path): assert get_lowest_directory(arg_dir) == f"/dir{index+1}" -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_8d(tmp_path, plugin, results_function): """ customised output_spec, adding Directory to the output named by input definition @@ -3325,7 +3325,7 @@ def get_lowest_directory(directory_path): ) -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): """ providing output name by providing output_file_template diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index ff6a273bbf..9fc1d5f91f 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -13,6 +13,9 @@ from ..submitter import Submitter from pydra.design import workflow, python +if ty.TYPE_CHECKING: + from pydra.engine.environments import Environment + need_docker = pytest.mark.skipif( shutil.which("docker") is None or sp.call(["docker", "info"]), @@ -35,17 +38,28 @@ ) -def result_no_submitter(shell_def: ShellDef, plugin: str = None): +def run_no_submitter( + shell_def: ShellDef, + cache_dir: Path | None = None, + plugin: str | None = None, + environment: "Environment | None" = None, +): """helper function to return result when running without submitter""" - return shell_def(worker=plugin) + return shell_def(worker=plugin, cache_dir=cache_dir, environment=environment) -def result_submitter(shell_def: ShellDef, plugin: str): +def run_submitter( + shell_def: ShellDef, + cache_dir: Path | None = None, + plugin: str | None = None, + environment: "Environment | None" = None, +): """helper function to return result when running with submitter with specific plugin """ - with Submitter(worker=plugin) as sub: - return sub(shell_def) + with Submitter(worker=plugin, cache_dir=cache_dir, environment=environment) as sub: + results = sub(shell_def) + return results.outputs dot_check = sp.run(["which", "dot"], stdout=sp.PIPE, stderr=sp.PIPE) From a42cc510479c19667e48f5bfe5389328232885e3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 16:06:04 +1100 Subject: [PATCH 270/342] test_dockertask tests pass --- new-docs/source/tutorial/5-shell.ipynb | 67 +++++++++++- pydra/design/shell.py | 11 +- pydra/engine/environments.py | 11 +- pydra/engine/tests/test_dockertask.py | 142 +++++++++++++------------ 4 files changed, 149 insertions(+), 82 deletions(-) diff --git a/new-docs/source/tutorial/5-shell.ipynb b/new-docs/source/tutorial/5-shell.ipynb index 806c1c6f9e..5ceb8ced72 100644 --- a/new-docs/source/tutorial/5-shell.ipynb +++ b/new-docs/source/tutorial/5-shell.ipynb @@ -109,11 +109,6 @@ "print(trim_png.cmdline)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -180,6 +175,68 @@ "print(f\"'--int-arg' default: {fields_dict(Cp)['int_arg'].default}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Path templates for output files\n", + "\n", + "By default, when an output file argument is defined, a `path_template` attribute will\n", + "be assigned to the field based on its name and extension (if applicable). For example,\n", + "the `zipped` output field in the following Gzip command will be assigned a\n", + "`path_template` of `out_file.gz`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pydra.design import shell\n", + "from fileformats.generic import File\n", + "\n", + "Gzip = shell.define(\"gzip \")\n", + "gzip = Gzip(in_files=File.mock(\"/a/file.txt\"))\n", + "print(gzip.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, if this needs to be specified it can be by using the `$` operator, e.g." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Gzip = shell.define(\"gzip \")\n", + "gzip = Gzip(in_files=File.mock(\"/a/file.txt\"))\n", + "print(gzip.cmdline)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To give the field a path_template of `archive.gz` when it is written on the command line.\n", + "Note that this value can always be overridden when the task is initialised, e.g." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gzip = Gzip(in_files=File.mock(\"/a/file.txt\"), out_file=\"/path/to/archive.gz\")\n", + "print(gzip.cmdline)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index b87e759445..6722f44e52 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -549,7 +549,7 @@ def parse_command_line_template( tokens = tokens[start_args_index:] if not tokens: return executable, inputs, outputs - arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+\*]+(?:\?|=[^>]+)?)>" + arg_pattern = r"<([:a-zA-Z0-9_,\|\-\.\/\+\*]+(?:\?|(?:=|\$)[^>]+)?)>" opt_pattern = r"--?[a-zA-Z0-9_]+" arg_re = re.compile(arg_pattern) opt_re = re.compile(opt_pattern) @@ -644,6 +644,13 @@ def from_type_str(type_str) -> type: elif "=" in name: name, default = name.split("=") kwds["default"] = eval(default) + elif "$" in name: + name, path_template = name.split("$") + kwds["path_template"] = path_template + if field_type is not outarg: + raise ValueError( + f"Path templates can only be used with output fields, not {token}" + ) if ":" in name: name, type_str = name.split(":") type_ = from_type_str(type_str) @@ -661,7 +668,7 @@ def from_type_str(type_str) -> type: # Add field to outputs with the same name as the input add_arg(name, out, {"type": type_, "callable": _InputPassThrough(name)}) # If name contains a '.', treat it as a file template and strip it from the name - if field_type is outarg: + if field_type is outarg and "path_template" not in kwds: path_template = name if is_fileset_or_union(type_): if ty.get_origin(type_): diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index e28bef0596..c32c35f2df 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -92,7 +92,7 @@ def bind(self, loc, mode="ro"): loc_abs = Path(loc).absolute() return f"{loc_abs}:{self.root}{loc_abs}:{mode}" - def _get_bindings( + def get_bindings( self, task: "Task", root: str | None = None ) -> tuple[dict[str, tuple[str, str]], dict[str, tuple[Path, ...]]]: """Return bindings necessary to run task in an alternative root. @@ -156,13 +156,14 @@ class Docker(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = self._get_bindings(task=task, root=self.root) + mounts, input_updates = self.get_bindings(task=task, root=self.root) + + # add the cache directory to the list of mounts + mounts[task.cache_dir] = (f"{self.root}{task.cache_dir}", "rw") docker_args = [ "docker", "run", - "-v", - self.bind(task.cache_dir, "rw"), *self.xargs, ] docker_args.extend( @@ -195,7 +196,7 @@ class Singularity(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = self._get_bindings(task=task, root=self.root) + mounts, input_updates = self.get_bindings(task=task, root=self.root) # todo adding xargsy etc singularity_args = [ diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index b1e86e0ba7..ec39781ed0 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,3 +1,4 @@ +import attrs import pytest from pydra.engine.submitter import Submitter from pydra.engine.specs import ShellDef, ShellOutputs @@ -100,7 +101,10 @@ def test_docker_st_1(run_function, plugin, tmp_path): docky = Docky().split(executable=cmd) outputs = run_function(docky, tmp_path, plugin, environment=Docker(image="busybox")) - assert outputs.stdout[0] == f"/mnt/pydra{docky.output_dir[0]}\n" + assert ( + outputs.stdout[0] + == f"/mnt/pydra{tmp_path}/{attrs.evolve(docky, executable=cmd[0])._checksum}\n" + ) assert outputs.stdout[1] == "root\n" assert outputs.return_code[0] == outputs.return_code[1] == 0 @@ -115,8 +119,7 @@ def test_docker_outputspec_1(plugin, tmp_path): customised output_spec, adding files to the output, providing specific pathname output_path is automatically added to the bindings """ - outputs = [shell.out(name="newfile", type=File, help="new file")] - Docky = shell.define("touch newfile_tmp.txt", outputs=outputs) + Docky = shell.define("touch ") docky = Docky() outputs = docky(plugin=plugin, environment=Docker(image="ubuntu")) @@ -136,23 +139,22 @@ def test_docker_inputspec_1(tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - argstr="", - help="input file", - ) - ] - - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - file=filename, - strip=True, + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + ) + ], ) - outputs = docky() + docky = Docky(file=filename) + + outputs = docky(environment=Docker(image="busybox"), cache_dir=tmp_path) assert outputs.stdout.strip() == "hello from pydra" @@ -168,23 +170,23 @@ def test_docker_inputspec_1a(tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - default=filename, - position=1, - argstr="", - help="input file", - ) - ] - - docky = shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - strip=True, + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + default=filename, + position=1, + argstr="", + help="input file", + ) + ], ) - outputs = docky() + docky = Docky() + + outputs = docky(environment=Docker(image="busybox"), cache_dir=tmp_path) assert outputs.stdout.strip() == "hello from pydra" @@ -476,33 +478,33 @@ def test_docker_wf_inputspec_1(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - argstr="", - help="input file", - ) - ] + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + ) + ], + ) @workflow.define - def Workflow(cmd, file): + def Workflow(file): docky = workflow.add( - shell.define(cmd, inputs=inputs)( - file=file, - environment=Docker(image="busybox"), - strip=True, - ) + Docky(file=file), + environment=Docker(image="busybox"), ) return docky.stdout - wf = Workflow(cmd=cmd, file=filename) + wf = Workflow(file=filename) outputs = wf() - assert outputs.ou.strip() == "hello from pydra" + assert outputs.out.strip() == "hello from pydra" @no_win @@ -532,7 +534,7 @@ def test_docker_wf_state_inputspec_1(plugin, tmp_path): ) @workflow.define - def Workflow(cmd, file): + def Workflow(file): docky = workflow.add( Docky(file=file), @@ -541,9 +543,9 @@ def Workflow(cmd, file): return docky.stdout - wf = Workflow(cmd=cmd) + wf = Workflow().split(file=[file_1, file_2]) - outputs = wf(split={"file": [file_1, file_2]}) + outputs = wf() assert outputs.out[0].strip() == "hello from pydra" assert outputs.out[1].strip() == "have a nice one" @@ -562,30 +564,30 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): cmd = "cat" - inputs = [ - shell.arg( - name="file", - type=File, - position=1, - argstr="", - help="input file", - ) - ] + Docky = shell.define( + cmd, + inputs=[ + shell.arg( + name="file", + type=File, + position=1, + argstr="", + help="input file", + ) + ], + ) @workflow.define - def Workflow(cmd, file): + def Workflow(file): docky = workflow.add( - shell.define(cmd, inputs=inputs)( - environment=Docker(image="busybox"), - file=file, - strip=True, - ) + Docky(file=file), + environment=Docker(image="busybox"), ) return docky.stdout - wf = Workflow(cmd=cmd) + wf = Workflow().split(file=[str(file_1), str(file_2)]) - outputs = wf(split={"file": [str(file_1), str(file_2)]}) + outputs = wf() assert outputs.out == ["hello from pydra", "have a nice one"] From fdd5e52af1f4e2c028f34db6757cc75e12b4bb22 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 16:21:24 +1100 Subject: [PATCH 271/342] fixed initialisation of environment in Submitter --- pydra/engine/submitter.py | 4 +++- pydra/engine/tests/test_singularity.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index eecdf420e3..c9433e2d21 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -99,6 +99,8 @@ def __init__( **kwargs, ): + from pydra.engine.environments import Native + if worker is None: worker = "debug" @@ -120,7 +122,7 @@ def __init__( self.cache_dir = cache_dir self.cache_locations = cache_locations - self.environment = environment + self.environment = environment if environment is not None else Native() self.rerun = rerun self.loop = get_open_loop() self._own_loop = not self.loop.is_running() diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 92fc6c1e9e..97f3f109a4 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -375,7 +375,7 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): @shell.define class Singu(ShellDef["Singu.Outputs"]): - executable = ["sed", "-is", "'s/hello/hi/'"] + executable = ["sed", "-is", "s/hello/hi/"] orig_file: File = shell.arg( position=1, From 44d0d67d7a6c5a7ebf20a9783fcf27fb45025491 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 24 Feb 2025 18:14:39 +1100 Subject: [PATCH 272/342] debugged test_dockertask --- pydra/design/shell.py | 11 ++++++----- pydra/design/tests/test_shell.py | 22 +++++++--------------- pydra/engine/core.py | 8 +++++++- pydra/engine/tests/test_dockertask.py | 4 ++-- pydra/engine/tests/test_singularity.py | 2 +- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 6722f44e52..7ea40f78b8 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -4,7 +4,6 @@ import typing as ty import re from collections import defaultdict -import shlex import inspect from copy import copy import attrs @@ -28,6 +27,7 @@ MultiInputObj, is_optional, optional_type, + non_optional_type, ) if ty.TYPE_CHECKING: @@ -117,8 +117,7 @@ def _validate_sep(self, _, sep): tp = ty.get_args(self.type)[0] else: tp = self.type - if is_optional(tp): - tp = optional_type(tp) + tp = non_optional_type(tp) origin = ty.get_origin(tp) or tp if ( @@ -534,7 +533,7 @@ def parse_command_line_template( if isinstance(template, list): tokens = template else: - tokens = shlex.split(template) + tokens = template.split() executable = [] start_args_index = 0 for part in tokens: @@ -643,7 +642,9 @@ def from_type_str(type_str) -> type: kwds["default"] = attrs.Factory(list) elif "=" in name: name, default = name.split("=") - kwds["default"] = eval(default) + kwds["default"] = ( + default[1:-1] if re.match(r"('|\").*\1", default) else eval(default) + ) elif "$" in name: name, path_template = name.split("$") kwds["path_template"] = path_template diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index f4fcd64238..65d8d890a2 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -211,6 +211,7 @@ def test_interface_template_more_complex(): name="tuple_arg", argstr="--tuple-arg", type=tuple[int, str] | None, + sep=" ", default=None, sep=" ", position=6, @@ -286,11 +287,7 @@ def test_interface_template_with_overrides_and_optionals(): position=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg( - name="in_fs_objects", - type=MultiInputObj[FsObject], - position=1, - ), + shell.arg(name="in_fs_objects", type=MultiInputObj[FsObject], position=1), shell.arg( name="recursive", argstr="-R", @@ -313,6 +310,7 @@ def test_interface_template_with_overrides_and_optionals(): type=tuple[int, str], sep=" ", position=5, + sep=" ", ), ] + outargs + [ShellDef.additional_args] assert sorted_fields(Cp.Outputs) == outargs + [ @@ -362,11 +360,7 @@ def test_interface_template_with_defaults(): position=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg( - name="in_fs_objects", - type=MultiInputObj[FsObject], - position=1, - ), + shell.arg(name="in_fs_objects", type=MultiInputObj[FsObject], position=1), output, shell.arg(name="recursive", argstr="-R", type=bool, default=True, position=3), shell.arg( @@ -379,6 +373,7 @@ def test_interface_template_with_defaults(): type=tuple[int, str], default=(1, "bar"), position=6, + sep=" ", ), ShellDef.additional_args, ] @@ -433,11 +428,7 @@ def test_interface_template_with_type_overrides(): position=0, help=shell.EXECUTABLE_HELP_STRING, ), - shell.arg( - name="in_fs_objects", - type=MultiInputObj[FsObject], - position=1, - ), + shell.arg(name="in_fs_objects", type=MultiInputObj[FsObject], position=1), output, shell.arg(name="recursive", argstr="-R", type=bool, default=False, position=3), shell.arg(name="text_arg", argstr="--text-arg", type=str, position=4), @@ -453,6 +444,7 @@ def test_interface_template_with_type_overrides(): argstr="--tuple-arg", type=tuple[int, str], position=6, + sep=" ", ), ShellDef.additional_args, ] diff --git a/pydra/engine/core.py b/pydra/engine/core.py index b8bd5a87c7..9b7af85008 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -773,11 +773,17 @@ def add( OutputType The outputs definition of the node """ + from pydra.engine.environments import Native + if name is None: name = type(task_def).__name__ if name in self._nodes: raise ValueError(f"Node with name {name!r} already exists in the workflow") - if environment and task_def._task_type != "shell": + if ( + environment + and not isinstance(environment, Native) + and task_def._task_type != "shell" + ): raise ValueError( "Environments can only be used with 'shell' tasks not " f"{task_def._task_type!r} tasks ({task_def!r})" diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index ec39781ed0..4c98cb2a37 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -545,7 +545,7 @@ def Workflow(file): wf = Workflow().split(file=[file_1, file_2]) - outputs = wf() + outputs = wf(cache_dir=tmp_path) assert outputs.out[0].strip() == "hello from pydra" assert outputs.out[1].strip() == "have a nice one" @@ -589,5 +589,5 @@ def Workflow(file): wf = Workflow().split(file=[str(file_1), str(file_2)]) - outputs = wf() + outputs = wf(cache_dir=tmp_path) assert outputs.out == ["hello from pydra", "have a nice one"] diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 97f3f109a4..e2487a7660 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -390,7 +390,7 @@ class Outputs(ShellOutputs): help="output file", ) - singu = Singu(orig_file=str(file)) + singu = Singu(orig_file=file) outputs = singu(environment=Singularity(image=image), cache_dir=tmp_path) assert outputs.stdout == "" From 28f1bc5bbabec093c59fb812a52ac12f9b7fb7cc Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 11:29:46 +1100 Subject: [PATCH 273/342] fixed merge error --- pydra/design/shell.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 7ea40f78b8..a1a39e4972 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -27,7 +27,6 @@ MultiInputObj, is_optional, optional_type, - non_optional_type, ) if ty.TYPE_CHECKING: @@ -117,7 +116,8 @@ def _validate_sep(self, _, sep): tp = ty.get_args(self.type)[0] else: tp = self.type - tp = non_optional_type(tp) + if is_optional(tp): + tp = optional_type(tp) origin = ty.get_origin(tp) or tp if ( From 3f397e0c619fd04d538e601950b293d0d6d0e551 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 11:43:49 +1100 Subject: [PATCH 274/342] fixed merging error in test_shell --- pydra/design/tests/test_shell.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 65d8d890a2..b8804134d5 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -213,7 +213,6 @@ def test_interface_template_more_complex(): type=tuple[int, str] | None, sep=" ", default=None, - sep=" ", position=6, ), ShellDef.additional_args, From 408029861a00db1654db042bb589fd07d1d86876 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 15:18:42 +1100 Subject: [PATCH 275/342] debugging unittests --- pydra/design/tests/test_shell.py | 1 - pydra/engine/helpers_file.py | 18 ++-- pydra/engine/tests/test_helpers_file.py | 2 +- pydra/engine/tests/test_nipype1_convert.py | 100 --------------------- pydra/engine/tests/test_node_task.py | 2 +- pydra/engine/tests/test_numpy_examples.py | 58 ++++++------ pydra/engine/tests/test_profiles.py | 59 +++++------- pydra/engine/tests/test_specs.py | 77 +++++++++------- 8 files changed, 106 insertions(+), 211 deletions(-) delete mode 100644 pydra/engine/tests/test_nipype1_convert.py diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index b8804134d5..184c8e05e2 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -309,7 +309,6 @@ def test_interface_template_with_overrides_and_optionals(): type=tuple[int, str], sep=" ", position=5, - sep=" ", ), ] + outargs + [ShellDef.additional_args] assert sorted_fields(Cp.Outputs) == outargs + [ diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 5e55518e53..807778e757 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -12,6 +12,9 @@ from fileformats.generic import FileSet from pydra.engine.helpers import is_lazy, attrs_values, list_fields +if ty.TYPE_CHECKING: + from pydra.engine.specs import TaskDef + from pydra.design import shell logger = logging.getLogger("pydra") @@ -235,18 +238,23 @@ def _template_formatting(field, definition, input_values): # as default, we assume that keep_extension is True if isinstance(template, (tuple, list)): formatted = [ - _string_template_formatting(field, t, definition, input_values) + _single_template_formatting(field, t, definition, input_values) for t in template ] else: assert isinstance(template, str) - formatted = _string_template_formatting( + formatted = _single_template_formatting( field, template, definition, input_values ) - return Path(formatted) + return formatted -def _string_template_formatting(field, template, definition, input_values): +def _single_template_formatting( + field: "shell.outarg", + template: str, + definition: "TaskDef", + input_values: dict[str, ty.Any], +) -> Path | None: from pydra.utils.typing import MultiInputObj, MultiOutputFile inp_fields = re.findall(r"{\w+}", template) @@ -328,7 +336,7 @@ def _string_template_formatting(field, template, definition, input_values): formatted_value = _element_formatting( template, val_dict, file_template, keep_extension=field.keep_extension ) - return formatted_value + return Path(formatted_value) if formatted_value is not None else formatted_value def _element_formatting( diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 65417b9efe..3b15e5bfd2 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -402,4 +402,4 @@ def test_template_formatting(tmp_path: Path): input_values=inputs_dict, output_dir=tmp_path, spec_type="input", - ) == [str(tmp_path / "file.bvec"), str(tmp_path / "file.bval")] + ) == [tmp_path / "file.bvec", tmp_path / "file.bval"] diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py deleted file mode 100644 index 07af76e501..0000000000 --- a/pydra/engine/tests/test_nipype1_convert.py +++ /dev/null @@ -1,100 +0,0 @@ -import typing as ty -import pytest -from pathlib import Path -from pydra.engine.specs import ShellOutputs, ShellDef -from fileformats.generic import File -from pydra.design import shell - - -def find_txt(output_dir: Path) -> File: - files = list(output_dir.glob("*.txt")) - assert len(files) == 1 - return files[0] - - -interf_inputs = [shell.arg(name="test", type=ty.Any, help="test")] -interf_outputs = [shell.out(name="test_out", type=File, callable=find_txt)] - - -Interf_1 = shell.define("testing", inputs=interf_inputs, outputs=interf_outputs) -Interf_2 = shell.define("testing command", inputs=interf_inputs, outputs=interf_outputs) - - -@shell.define -class Interf_3(ShellDef["Interf_3.Outputs"]): - """class with customized input and executables""" - - executable = ["testing", "command"] - - in_file: str = shell.arg(help="in_file", argstr="{in_file}") - - @shell.outputs - class Outputs(ShellOutputs): - pass - - -@shell.define -class TouchInterf(ShellDef["TouchInterf.Outputs"]): - """class with customized input and executables""" - - new_file: str = shell.outarg(help="new_file", argstr="", path_template="{new_file}") - executable = "touch" - - @shell.outputs - class Outputs(ShellOutputs): - pass - - -def test_interface_specs_1(): - """testing if class input/output definition are set properly""" - task_spec = Interf_1(executable="ls") - assert task.Outputs == Interf_1.Outputs - - -def test_interface_specs_2(): - """testing if class input/output definition are overwritten properly by the user's specs""" - my_input_spec = SpecInfo( - name="Input", - fields=[("my_inp", ty.Any, {"help": "my inp"})], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", fields=[("my_out", File, "*.txt")], bases=(ShellOutputs,) - ) - task = Interf_1(input_spec=my_input_spec, output_spec=my_output_spec) - assert task.input_spec == my_input_spec - assert task.output_spec == my_output_spec - - -def test_interface_executable_1(): - """testing if the class executable is properly set and used in the command line""" - task = Interf_2() - assert task.executable == "testing command" - assert task.definition.executable == "testing command" - assert task.cmdline == "testing command" - - -def test_interface_executable_2(): - """testing if the class executable is overwritten by the user's input (and if the warning is raised)""" - # warning that the user changes the executable from the one that is set as a class attribute - with pytest.warns(UserWarning, match="changing the executable"): - task = Interf_2(executable="i want a different command") - assert task.executable == "testing command" - # task.executable stays the same, but input.executable is changed, so the cmd is changed - assert task.definition.executable == "i want a different command" - assert task.cmdline == "i want a different command" - - -def test_interface_cmdline_with_spaces(): - task = Interf_3(in_file="/path/to/file/with spaces") - assert task.executable == "testing command" - assert task.definition.executable == "testing command" - assert task.cmdline == "testing command '/path/to/file/with spaces'" - - -def test_interface_run_1(): - """testing execution of a simple interf with customized input and executable""" - task = TouchInterf(new_file="hello.txt") - assert task.cmdline == "touch hello.txt" - res = task() - assert res.output.new_file.fspath.exists() diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 85c67eb8dd..51316324fd 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -77,7 +77,7 @@ def test_task_init_3( if input_type == "array": a_in = np.array(a_in) - nn = FunAddTwo(name="NA").split(splitter=splitter, a=a_in) + nn = FunAddTwo().split(splitter=splitter, a=a_in) assert np.allclose(nn.inputs.a, [3, 5]) assert nn.state.splitter == state_splitter diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index cec176deaf..08b3907081 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -16,44 +16,41 @@ @python.define(outputs=["b"]) -def arrayout(val): +def ArrayOut(val): return np.array([val, val]) def test_multiout(tmpdir): """testing a simple function that returns a numpy array""" - wf = Workflow("wf", input_spec=["val"], val=2) - wf.add(arrayout(name="mo", val=wf.lzin.val)) - wf.set_output([("array", wf.mo.lzout.b)]) - wf.cache_dir = tmpdir + @workflow.define(outputs=["array"]) + def Workflow(val): + mo = workflow.add(ArrayOut(val=val)) + return mo.b - with Submitter(worker="cf", n_procs=2) as sub: - sub(runnable=wf) + wf = Workflow(val=2) - results = wf.result(return_inputs=True) + with Submitter(worker="cf", cache_dir=tmpdir, n_procs=2) as sub: + results = sub(wf) - assert results[0] == {"wf.val": 2} - assert np.array_equal(results[1].output.array, np.array([2, 2])) + assert np.array_equal(results.outputs.array, np.array([2, 2])) def test_multiout_st(tmpdir): """testing a simple function that returns a numpy array, adding splitter""" - wf = Workflow("wf", input_spec=["val"], val=[0, 1, 2]) - wf.add(arrayout(name="mo")) - wf.mo.split("val", val=wf.lzin.val).combine("val") - wf.set_output([("array", wf.mo.lzout.b)]) - wf.cache_dir = tmpdir + @workflow.define(outputs=["array"]) + def Workflow(values): + mo = workflow.add(ArrayOut().split(val=values).combine("val")) + return mo.b - with Submitter(worker="cf", n_procs=2) as sub: - sub(runnable=wf) + wf = Workflow(values=[0, 1, 2]) - results = wf.result(return_inputs=True) + with Submitter(worker="cf", cache_dir=tmpdir, n_procs=2) as sub: + results = sub(wf) - assert results[0] == {"wf.val": [0, 1, 2]} for el in range(3): - assert np.array_equal(results[1].output.array[el], np.array([el, el])) + assert np.array_equal(results.outputs.array[el], np.array([el, el])) def test_numpy_hash_1(): @@ -81,20 +78,19 @@ def test_numpy_hash_3(): def test_task_numpyinput_1(tmp_path: Path): """task with numeric numpy array as an input""" - nn = Identity(name="NA") - nn.cache_dir = tmp_path - nn.split(x=[np.array([1, 2]), np.array([3, 4])]) + nn = Identity().split(x=[np.array([1, 2]), np.array([3, 4])]) # checking the results - results = nn() - assert (results[0].output.out == np.array([1, 2])).all() - assert (results[1].output.out == np.array([3, 4])).all() + outputs = nn(cache_dir=tmp_path) + assert (outputs.out[0] == np.array([1, 2])).all() + assert (outputs.out[1] == np.array([3, 4])).all() def test_task_numpyinput_2(tmp_path: Path): """task with numpy array of type object as an input""" - nn = Identity(name="NA") - nn.cache_dir = tmp_path - nn.split(x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)]) + nn = Identity().split( + x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)] + ) # checking the results - results = nn() - assert (results[0].output.out == np.array(["VAL1"], dtype=object)).all() + outputs = nn(cache_dir=tmp_path) + assert outputs.out[0] == np.array(["VAL1"], dtype=object) + assert outputs.out[1] == np.array(["VAL2"], dtype=object) diff --git a/pydra/engine/tests/test_profiles.py b/pydra/engine/tests/test_profiles.py index 19de274318..b8dbcaabe5 100644 --- a/pydra/engine/tests/test_profiles.py +++ b/pydra/engine/tests/test_profiles.py @@ -1,28 +1,29 @@ from ..helpers import load_task -from pydra.design import python +from pydra.design import python, workflow +from pydra.engine.core import Task +from pydra.engine.submitter import Submitter import numpy as np from pympler import asizeof from pytest import approx -def generate_list(l): - return np.arange(l).tolist() +def generate_list(n): + return np.arange(n).tolist() @python.define -def show_var(a): +def ShowVar(a): return a def create_wf(size): - wf = Workflow(name="wf", input_spec=["x"]) - wf.split("x", x=generate_list(size)) - wf.add(show_var(name="show", a=wf.lzin.x)) - wf.set_output([("out", wf.show.lzout.out)]) - wf.state.prepare_states(wf.inputs) - wf.state.prepare_inputs() - return wf + @workflow.define + def Workflow(x): + show = workflow.add(ShowVar(a=x)) + return show.out + + return Workflow().split(x=generate_list(size)) def test_wf_memory(): @@ -30,35 +31,15 @@ def test_wf_memory(): testings if the size of workflow grows linearly """ - wf_1000 = create_wf(size=1000) - wf_1000_mem = asizeof.asizeof(wf_1000) + wf_10000 = create_wf(size=10000) + wf_10000_mem = asizeof.asizeof(wf_10000) - wf_2000 = create_wf(size=2000) - wf_2000_mem = asizeof.asizeof(wf_2000) + wf_20000 = create_wf(size=20000) + wf_20000_mem = asizeof.asizeof(wf_20000) - wf_4000 = create_wf(size=4000) - wf_4000_mem = asizeof.asizeof(wf_4000) + wf_40000 = create_wf(size=40000) + wf_40000_mem = asizeof.asizeof(wf_40000) # checking if it's linear with the size of the splitter # check print(asizeof.asized(wf_4000, detail=2).format()) in case of problems - assert wf_4000_mem / wf_2000_mem == approx(2, 0.05) - assert wf_2000_mem / wf_1000_mem == approx(2, 0.05) - - -def test_load_task_memory(): - """creating two workflow with relatively big splitter: 1000 and 4000 elements - testings if load_task for a single element returns tasks of a similar size - """ - - wf_1000 = create_wf(size=1000) - wf_1000_pkl = wf_1000.pickle_task() - wf_1000_loaded = load_task(task_pkl=wf_1000_pkl, ind=1) - wf_1000_single_mem = asizeof.asizeof(wf_1000_loaded) - - wf_4000 = create_wf(size=4000) - wf_4000_pkl = wf_4000.pickle_task() - wf_4000_loaded = load_task(task_pkl=wf_4000_pkl, ind=1) - wf_4000_single_mem = asizeof.asizeof(wf_4000_loaded) - - # checking if it doesn't change with size of the splitter - # check print(asizeof.asized(wf_4000_loaded, detail=2).format()) in case of problems - assert wf_1000_single_mem / wf_4000_single_mem == approx(1, 0.05) + assert wf_40000_mem / wf_20000_mem == approx(2, 0.05) + assert wf_20000_mem / wf_10000_mem == approx(2, 0.05) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 90bfeb004f..950d68b8a3 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -2,6 +2,7 @@ import typing as ty import os import attrs +from unittest.mock import Mock # from copy import deepcopy import time @@ -9,7 +10,6 @@ from ..specs import ( Runtime, Result, - ShellDef, ) from pydra.engine.lazy import ( LazyInField, @@ -19,12 +19,14 @@ from pydra.utils.typing import StateArray # from ..helpers import make_klass -from .utils import Foo +from .utils import Foo, BasicWorkflow from pydra.design import python, workflow import pytest -make_klass = lambda x: x +# @python.define +# def Foo(a: str, b: int, c: float) -> str: +# return f"{a}{b}{c}" def test_runtime(): @@ -34,22 +36,14 @@ def test_runtime(): assert hasattr(runtime, "cpu_peak_percent") -def test_result(): - result = Result() +def test_result(tmp_path): + result = Result(output_dir=tmp_path) assert hasattr(result, "runtime") - assert hasattr(result, "output") + assert hasattr(result, "outputs") assert hasattr(result, "errored") assert getattr(result, "errored") is False -def test_shellspec(): - with pytest.raises(TypeError): - definition = ShellDef() - definition = ShellDef(executable="ls") # (executable, args) - assert hasattr(definition, "executable") - assert hasattr(definition, "args") - - class NodeTesting: @attrs.define() class Input: @@ -99,20 +93,33 @@ def __init__(self): self.tn = NodeTesting() -def test_lazy_inp(): - tn = NodeTesting() - lzin = LazyIn(task=tn) +@pytest.fixture +def mock_node(): + node = Mock() + node.name = "tn" + node.definition = Foo(a="a", b=1, c=2.0) + return node + - lf = lzin.inp_a - assert lf.get_value(wf=WorkflowTesting()) == "A" +@pytest.fixture +def mock_workflow(): + mock_workflow = Mock() + mock_workflow.inputs = BasicWorkflow(x=1) + mock_workflow.outputs = BasicWorkflow.Outputs(out=attrs.NOTHING) + return mock_workflow - lf = lzin.inp_b - assert lf.get_value(wf=WorkflowTesting()) == "B" + +def test_lazy_inp(mock_workflow): + lf = LazyInField(field="a", type=int, workflow=mock_workflow) + assert lf._get_value() == "a" + + lf = LazyInField(field="b", type=str, workflow_def=mock_workflow) + assert lf._get_value() == 1 def test_lazy_out(): tn = NodeTesting() - lzout = LazyOut(task=tn) + lzout = LazyOutField(task=tn) lf = lzout.out_a assert lf.get_value(wf=WorkflowTesting()) == "OUT_A" @@ -364,10 +371,10 @@ def f(x: ty.List[int]) -> ty.List[int]: def test_lazy_field_multi_diff_split(): @python.define - def f(x: ty.Any, y: ty.Any) -> ty.Any: + def F(x: ty.Any, y: ty.Any) -> ty.Any: return x - task = f(x=[1, 2, 3], name="foo") + task = F(x=[1, 2, 3], name="foo") lf = task.lzout.out.split("foo.x") @@ -383,18 +390,22 @@ def f(x: ty.Any, y: ty.Any) -> ty.Any: assert lf3.splits == set([(("foo.x",),), (("foo.y",),)]) -def test_wf_lzin_split(): +def test_wf_lzin_split(tmp_path): @python.define def identity(x: int) -> int: return x - inner = Workflow(name="inner", input_spec=["x"]) - inner.add(identity(x=inner.lzin.x, name="f")) - inner.set_output(("out", inner.f.lzout.out)) + @workflow.define + def Inner(x): + ident = workflow.add(identity(x=x)) + return ident.out + + @workflow.define + def Outer(xs): + inner = workflow.add(Inner().split(x=xs)) + return inner.out - outer = Workflow(name="outer", input_spec=["x"]) - outer.add(inner.split(x=outer.lzin.x)) - outer.set_output(("out", outer.inner.lzout.out)) + outer = Outer(xs=[1, 2, 3]) - outputs = outer(x=[1, 2, 3]) - assert outputs.out == StateArray([1, 2, 3]) + outputs = outer(cache_dir=tmp_path) + assert outputs.out == [1, 2, 3] From ffeece07009a68c32828bc29fb771b5f463acb23 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 19:19:32 +1100 Subject: [PATCH 276/342] debugged singularity tests --- pydra/engine/environments.py | 9 ++++----- pydra/engine/helpers_file.py | 2 +- pydra/engine/tests/test_environments.py | 4 ++-- pydra/engine/tests/test_singularity.py | 20 +++++++++++--------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index c32c35f2df..04f90d49e1 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -147,6 +147,10 @@ def get_bindings( if isinstance(fileset, os.PathLike) else tuple(env_path / rel for rel in fileset.relative_fspaths) ) + + # Add the cache directory to the list of mounts + bindings[task.cache_dir] = (f"{self.root}/{task.cache_dir}", "rw") + return bindings, input_updates @@ -158,9 +162,6 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: # mounting all input locations mounts, input_updates = self.get_bindings(task=task, root=self.root) - # add the cache directory to the list of mounts - mounts[task.cache_dir] = (f"{self.root}{task.cache_dir}", "rw") - docker_args = [ "docker", "run", @@ -202,8 +203,6 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_args = [ "singularity", "exec", - "-B", - self.bind(task.cache_dir, "rw"), *self.xargs, ] singularity_args.extend( diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 807778e757..5be17047b7 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -269,7 +269,7 @@ def _single_template_formatting( # inp_fields = set(re.findall(r"{(\w+)(?:\.\w+)?(?::[0-9.]+f)?}", template)) if len(inp_fields) == 0: - return template + return Path(template) val_dict = {} file_template = None diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py index d18f0215da..d0cbd7f63a 100644 --- a/pydra/engine/tests/test_environments.py +++ b/pydra/engine/tests/test_environments.py @@ -131,7 +131,7 @@ def newcache(x): makedir(tmp_path, x) cmd = "whoami" - sing = Singularity(image="docker://alpine") + sing = Singularity(image="docker://alpine", xargs=["--fakeroot"]) Shelly = shell.define(cmd) shelly = Shelly() shelly_job = Task( @@ -159,7 +159,7 @@ def newcache(x): makedir(tmp_path, x) cmd = "whoami" - sing = Singularity(image="docker://alpine") + sing = Singularity(image="docker://alpine", xargs=["--fakeroot"]) Shelly = shell.define(cmd) shelly = Shelly() shelly_job = Task( diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index e2487a7660..30a934e014 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -70,6 +70,7 @@ def test_singularity_2(plugin, tmp_path): worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path ) as sub: res = sub(singu) + assert not res.errored, "\n".join(res.errors["error message"]) assert res.outputs.stdout.strip() == " ".join(cmd[1:]) assert res.outputs.return_code == 0 @@ -110,11 +111,13 @@ def test_singularity_st_1(plugin, tmp_path): singu = Singu().split("executable", executable=cmd) outputs = singu( - plugin=plugin, environment=Singularity(image=image), cache_dir=tmp_path + plugin=plugin, + environment=Singularity(image=image, xargs=["--fakeroot"]), + cache_dir=tmp_path, ) - assert outputs.stdout[0] == "root" - assert outputs.stdout[1] == "/mnt/pydra" - assert outputs.stdout[2] == "" + assert outputs.stdout[0].strip() == "root" + assert "/mnt/pydra" in outputs.stdout[1] + assert outputs.stdout[2].strip() == "_task.pklz" assert outputs.return_code == [0, 0, 0] @@ -161,11 +164,10 @@ def test_singularity_outputspec_1(plugin, tmp_path): ) singu = Singu() - with Submitter( - worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path - ) as sub: + with Submitter(environment=Singularity(image=image), cache_dir=tmp_path) as sub: res = sub(singu) + assert not res.errored, "\n".join(res.errors["error message"]) assert res.outputs.stdout == "" assert res.outputs.newfile.fspath.exists() @@ -386,7 +388,7 @@ class Singu(ShellDef["Singu.Outputs"]): class Outputs(ShellOutputs): out_file: File = shell.outarg( - path_template="{orig_file}", + path_template="{orig_file}.txt", # FIXME: Shouldn't have to specify the extension help="output file", ) @@ -396,7 +398,7 @@ class Outputs(ShellOutputs): assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert outputs.out_file.fspath.parent == singu.output_dir + assert outputs.out_file.fspath.parent.parent == tmp_path with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged From 3b8f818beff250caecfcf18c021313d11703d09a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 19:37:13 +1100 Subject: [PATCH 277/342] made imports in pydra.design.* modules absolute --- pydra/design/boutiques.py | 4 ++-- pydra/design/python.py | 2 +- pydra/design/shell.py | 2 +- pydra/design/workflow.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 9a48edfd17..334552b5b1 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -6,8 +6,8 @@ from functools import reduce from fileformats.generic import File from pydra.engine.specs import ShellDef -from .base import make_task_def -from . import shell +from pydra.design.base import make_task_def +from pydra.design import shell class arg(shell.arg): diff --git a/pydra/design/python.py b/pydra/design/python.py index f41a4e0106..095404f41b 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -2,7 +2,7 @@ import inspect from typing import dataclass_transform import attrs -from .base import ( +from pydra.design.base import ( Arg, Out, ensure_field_objects, diff --git a/pydra/design/shell.py b/pydra/design/shell.py index a1a39e4972..1f0e75543c 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -13,7 +13,7 @@ from fileformats import generic from fileformats.core.exceptions import FormatRecognitionError from pydra.engine.helpers import attrs_values -from .base import ( +from pydra.design.base import ( Arg, Out, check_explicit_fields_are_none, diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index e9d5af3ec2..119cd95918 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -2,7 +2,7 @@ import inspect from typing import dataclass_transform import attrs -from .base import ( +from pydra.design.base import ( Arg, Out, ensure_field_objects, From 6aaf1b7f6ab1b34e40bb61b0ff169ffd4c2fea6f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 19:47:39 +1100 Subject: [PATCH 278/342] added fileformats-extras to test deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index a7b7de2e35..868a488d58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ test = [ "pytest-rerunfailures", "pytest-timeout", "codecov", + "fileformats-extras >=0.15a4", "numpy", "pyld", "psutil", From e6724c1a29a9169c5f82ed4ed4ec23aa547148e0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 25 Feb 2025 21:44:56 +1100 Subject: [PATCH 279/342] fetch tags in github actions so installed version is correct --- .github/workflows/testdask.yml | 3 ++- .github/workflows/testpsijlocal.yml | 3 ++- .github/workflows/testpsijslurm.yml | 2 ++ .github/workflows/testpydra.yml | 3 ++- .github/workflows/testsingularity.yml | 2 ++ .github/workflows/testslurm.yml | 2 ++ 6 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testdask.yml b/.github/workflows/testdask.yml index 0e9f3d447c..e14ba1f405 100644 --- a/.github/workflows/testdask.yml +++ b/.github/workflows/testdask.yml @@ -28,7 +28,8 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ github.repository }} - + - name: Fetch tags + run: git fetch --prune --unshallow - name: Setup Python version ${{ matrix.python-version }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/testpsijlocal.yml b/.github/workflows/testpsijlocal.yml index d99966bf5e..3ad359c505 100644 --- a/.github/workflows/testpsijlocal.yml +++ b/.github/workflows/testpsijlocal.yml @@ -28,7 +28,8 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ github.repository }} - + - name: Fetch tags + run: git fetch --prune --unshallow - name: Setup Python version ${{ matrix.python-version }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/testpsijslurm.yml b/.github/workflows/testpsijslurm.yml index cce4d9c242..b0fe551ba3 100644 --- a/.github/workflows/testpsijslurm.yml +++ b/.github/workflows/testpsijslurm.yml @@ -25,6 +25,8 @@ jobs: - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Pull docker image run: | docker pull $DOCKER_IMAGE diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index c1dfb352cc..9865b73137 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -59,7 +59,8 @@ jobs: steps: - name: Fetch repository uses: actions/checkout@v4 - + - name: Fetch tags + run: git fetch --prune --unshallow - name: Set up Python ${{ matrix.python-version }} on ${{ matrix.os }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index 5b43fdd771..c989334176 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -65,6 +65,8 @@ jobs: uses: actions/checkout@v4 with: repository: ${{ github.repository }} + - name: Fetch tags + run: git fetch --prune --unshallow - name: Install pydra (test) run: pip install -e ".[test]" diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index 1eaeb53d57..3e715a127b 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -25,6 +25,8 @@ jobs: - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV - uses: actions/checkout@v4 + - name: Fetch tags + run: git fetch --prune --unshallow - name: Pull docker image run: | docker pull $DOCKER_IMAGE From f7021e595dc17e4e9245564c734c09fa7aab7bd6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 10:18:36 +1100 Subject: [PATCH 280/342] fixed bug in state depth calculation where, single, combined states were being included --- pydra/engine/state.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 96c2881189..7ef22aa848 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -190,13 +190,13 @@ def names(self): @property def depth(self) -> int: - """Return the number of uncombined splits of the state, i.e. the number nested + """Return the number of splits of the state, i.e. the number nested state arrays to wrap around the type of lazy out fields Returns ------- int - number of uncombined splits + number of uncombined independent splits (i.e. linked splits only add 1) """ depth = 0 stack = [] @@ -210,7 +210,8 @@ def depth(self) -> int: stack = [] else: stack.append(spl) - return depth + len(stack) + remaining_stack = [s for s in stack if s not in self.combiner] + return depth + len(remaining_stack) @property def splitter(self): From b2034d56f7d5681b2a3fbe6d47238344c3e05d79 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 10:20:06 +1100 Subject: [PATCH 281/342] moved resolved lazy inputs into NodeExecution class from TaskDef --- pydra/engine/lazy.py | 48 ++++++++--- pydra/engine/node.py | 36 ++++---- pydra/engine/specs.py | 37 -------- pydra/engine/submitter.py | 39 +++++++-- pydra/engine/tests/test_specs.py | 144 ++++++++++++------------------- pydra/engine/tests/utils.py | 4 +- pydra/utils/typing.py | 10 +-- 7 files changed, 148 insertions(+), 170 deletions(-) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index f668acfc1b..936d47ead5 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -6,10 +6,9 @@ from . import node if ty.TYPE_CHECKING: - from .graph import DiGraph from .submitter import NodeExecution from .core import Task, Workflow - from .specs import TaskDef, WorkflowDef + from .specs import TaskDef from .state import StateIndex @@ -46,6 +45,27 @@ def _apply_cast(self, value): value = self._type(value) return value + def _get_value( + self, + node_exec: "NodeExecution", + state_index: "StateIndex | None" = None, + ) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + node_exec: NodeExecution + the object representing the execution state of the current node + state_index : StateIndex, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + raise NotImplementedError("LazyField is an abstract class") + @attrs.define(kw_only=True) class LazyInField(LazyField[T]): @@ -70,23 +90,25 @@ def _source(self): def _get_value( self, - workflow_def: "WorkflowDef", + node_exec: "NodeExecution", + state_index: "StateIndex | None" = None, ) -> ty.Any: """Return the value of a lazy field. Parameters ---------- - wf : Workflow - the workflow the lazy field references - state_index : int, optional - the state index of the field to access + node_exec: NodeExecution + the object representing the execution state of the current node + state_index : StateIndex, optional + the state index of the field to access (ignored, used for duck-typing with + LazyOutField) Returns ------- value : Any the resolved value of the lazy-field """ - value = workflow_def[self._field] + value = node_exec.workflow_inputs[self._field] value = self._apply_cast(value) return value @@ -105,16 +127,16 @@ def __repr__(self): def _get_value( self, - graph: "DiGraph[NodeExecution]", + node_exec: "NodeExecution", state_index: "StateIndex | None" = None, ) -> ty.Any: """Return the value of a lazy field. Parameters ---------- - wf : Workflow - the workflow the lazy field references - state_index : int, optional + node_exec: NodeExecution + the object representing the execution state of the current node + state_index : StateIndex, optional the state index of the field to access Returns @@ -130,7 +152,7 @@ def _get_value( if state_index is None: state_index = StateIndex() - task = graph.node(self._node.name).task(state_index) + task = node_exec.graph.node(self._node.name).task(state_index) _, split_depth = TypeParser.strip_splits(self._type) def get_nested(task: "Task[DefType]", depth: int): diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 2598a08fb6..156478b7e1 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -121,14 +121,28 @@ def lzout(self) -> OutputType: type=field.type, ) outputs = self.inputs.Outputs(**lazy_fields) - # Flag the output lazy fields as being not typed checked (i.e. assigned to another - # node's inputs) yet + outpt: lazy.LazyOutField for outpt in attrs_values(outputs).values(): - outpt._type_checked = False + # Assign the current node to the lazy fields so they can access the state outpt._node = self + # If the node has a non-empty state, wrap the type of the lazy field in + # a combination of an optional list and a number of nested StateArrays + # types based on the number of states the node is split over and whether + # it has a combiner + if self._state: + type_, _ = TypeParser.strip_splits(outpt._type) + if self._state.combiner: + type_ = list[type_] + for _ in range(self._state.depth - int(bool(self._state.combiner))): + type_ = StateArray[type_] + outpt._type = type_ + # Flag the output lazy fields as being not typed checked (i.e. assigned to + # another node's inputs) yet. This is used to prevent the user from changing + # the type of the output after it has been accessed by connecting it to an + # output of an upstream node with additional state variables. + outpt._type_checked = False self._lzout = outputs - self._wrap_lzout_types_in_state_arrays() return outputs @property @@ -217,20 +231,6 @@ def _check_if_outputs_have_been_used(self, msg): + msg ) - def _wrap_lzout_types_in_state_arrays(self) -> None: - """Wraps a types of the lazy out fields in a number of nested StateArray types - based on the number of states the node is split over""" - # Unwrap StateArray types from the output types - if not self.state: - return - outpt_lf: lazy.LazyOutField - for outpt_lf in attrs_values(self.lzout).values(): - assert not outpt_lf._type_checked - type_, _ = TypeParser.strip_splits(outpt_lf._type) - for _ in range(self._state.depth): - type_ = StateArray[type_] - outpt_lf._type = type_ - def _set_state(self) -> None: # Add node name to state's splitter, combiner and cont_dim loaded from the def splitter = self._definition._splitter diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index cd7f43b4ae..d92ce66434 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -35,14 +35,12 @@ from pydra.utils.typing import StateArray, MultiInputObj from pydra.design.base import Field, Arg, Out, RequirementSet, NO_DEFAULT from pydra.design import shell -from pydra.engine.lazy import LazyInField, LazyOutField if ty.TYPE_CHECKING: from pydra.engine.core import Task from pydra.engine.graph import DiGraph from pydra.engine.submitter import NodeExecution from pydra.engine.core import Workflow - from pydra.engine.state import StateIndex from pydra.engine.environments import Environment from pydra.engine.workers import Worker @@ -476,41 +474,6 @@ def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: } return hash_function(sorted(field_hashes.items())), field_hashes - def _resolve_lazy_inputs( - self, - workflow_inputs: "WorkflowDef", - graph: "DiGraph[NodeExecution]", - state_index: "StateIndex | None" = None, - ) -> Self: - """Resolves lazy fields in the task definition by replacing them with their - actual values. - - Parameters - ---------- - workflow : Workflow - The workflow the task is part of - graph : DiGraph[NodeExecution] - The execution graph of the workflow - state_index : StateIndex, optional - The state index for the workflow, by default None - - Returns - ------- - Self - The task definition with all lazy fields resolved - """ - from pydra.engine.state import StateIndex - - if state_index is None: - state_index = StateIndex() - resolved = {} - for name, value in attrs_values(self).items(): - if isinstance(value, LazyInField): - resolved[name] = value._get_value(workflow_inputs) - elif isinstance(value, LazyOutField): - resolved[name] = value._get_value(graph, state_index) - return attrs.evolve(self, **resolved) - def _check_rules(self): """Check if all rules are satisfied.""" diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index c9433e2d21..a869c10272 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -9,15 +9,18 @@ from copy import copy from datetime import datetime from collections import defaultdict +import attrs from .workers import Worker, WORKERS from .graph import DiGraph from .helpers import ( get_open_loop, list_fields, + attrs_values, ) from pydra.utils.hash import PersistentCache from .state import StateIndex from pydra.utils.typing import StateArray +from pydra.engine.lazy import LazyField from .audit import Audit from .core import Task from pydra.utils.messenger import AuditFlag, Messenger @@ -607,10 +610,7 @@ def all_failed(self) -> bool: def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: if not self.node.state: yield Task( - definition=self.node._definition._resolve_lazy_inputs( - workflow_inputs=self.workflow_inputs, - graph=self.graph, - ), + definition=self._resolve_lazy_inputs(task_def=self.node._definition), submitter=self.submitter, environment=self.node._environment, hooks=self.node._hooks, @@ -619,9 +619,8 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: else: for index, split_defn in self.node._split_definition().items(): yield Task( - definition=split_defn._resolve_lazy_inputs( - workflow_inputs=self.workflow_inputs, - graph=self.graph, + definition=self._resolve_lazy_inputs( + task_def=split_defn, state_index=index, ), submitter=self.submitter, @@ -631,6 +630,32 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: state_index=index, ) + def _resolve_lazy_inputs( + self, + task_def: "TaskDef", + state_index: "StateIndex | None" = None, + ) -> "TaskDef": + """Resolves lazy fields in the task definition by replacing them with their + actual values calculated by upstream jobs. + + Parameters + ---------- + task_def : TaskDef + The definition to resolve the lazy fields of + state_index : StateIndex, optional + The state index for the workflow, by default None + + Returns + ------- + TaskDef + The task definition with all lazy fields resolved + """ + resolved = {} + for name, value in attrs_values(self).items(): + if isinstance(value, LazyField): + resolved[name] = value._get_value(self, state_index) + return attrs.evolve(self, **resolved) + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """For a given node, check to see which tasks have been successfully run, are ready to run, can't be run due to upstream errors, or are blocked on other tasks to complete. diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 950d68b8a3..20b410026e 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -1,13 +1,11 @@ from pathlib import Path import typing as ty import os -import attrs from unittest.mock import Mock - -# from copy import deepcopy import time +import pytest from fileformats.generic import File -from ..specs import ( +from pydra.engine.specs import ( Runtime, Result, ) @@ -15,113 +13,68 @@ LazyInField, LazyOutField, ) - +from pydra.engine.core import Workflow +from pydra.engine.submitter import NodeExecution from pydra.utils.typing import StateArray - -# from ..helpers import make_klass -from .utils import Foo, BasicWorkflow from pydra.design import python, workflow -import pytest - - -# @python.define -# def Foo(a: str, b: int, c: float) -> str: -# return f"{a}{b}{c}" - - -def test_runtime(): - runtime = Runtime() - assert hasattr(runtime, "rss_peak_gb") - assert hasattr(runtime, "vms_peak_gb") - assert hasattr(runtime, "cpu_peak_percent") - - -def test_result(tmp_path): - result = Result(output_dir=tmp_path) - assert hasattr(result, "runtime") - assert hasattr(result, "outputs") - assert hasattr(result, "errored") - assert getattr(result, "errored") is False +from .utils import Foo, FunAddTwo, FunAddVar, ListSum -class NodeTesting: - @attrs.define() - class Input: - inp_a: str = "A" - inp_b: str = "B" +@workflow.define +def TestWorkflow(x: int, y: list[int]) -> int: + node_a = workflow.add(FunAddTwo(a=x), name="A") + node_b = workflow.add(FunAddVar(a=node_a.out).split(b=y).combine("b"), name="B") + node_c = workflow.add(ListSum(x=node_b.out), name="C") + return node_c.out - def __init__(self): - class InpDef: - def __init__(self): - self.fields = [("inp_a", int), ("inp_b", int)] - class Outputs: - def __init__(self): - self.fields = [("out_a", int)] - - self.name = "tn" - self.inputs = self.Input() - self.input_spec = InpDef() - self.output_spec = Outputs() - self.output_names = ["out_a"] - self.state = None - - def result(self, state_index=None): - class Output: - def __init__(self): - self.out_a = "OUT_A" +@pytest.fixture +def workflow_task(): + return TestWorkflow(x=1, y=[1, 2, 3]) - class Result: - def __init__(self): - self.output = Output() - self.errored = False - def get_output_field(self, field): - return getattr(self.output, field) +@pytest.fixture +def workflow_obj(workflow_task): + wf = Workflow.construct(workflow_task) + for n in wf.nodes: + if n._state: + n._state.prepare_states() + n._state.prepare_inputs() + return wf - return Result() +@pytest.fixture +def node_a(workflow_obj): + return workflow_obj["A"] -class WorkflowTesting: - def __init__(self): - class Input: - def __init__(self): - self.inp_a = "A" - self.inp_b = "B" - self.inputs = Input() - self.tn = NodeTesting() +@pytest.fixture +def node_b(workflow_obj): + return workflow_obj["B"] @pytest.fixture -def mock_node(): - node = Mock() - node.name = "tn" - node.definition = Foo(a="a", b=1, c=2.0) - return node +def node_c(workflow_obj): + return workflow_obj["C"] @pytest.fixture -def mock_workflow(): - mock_workflow = Mock() - mock_workflow.inputs = BasicWorkflow(x=1) - mock_workflow.outputs = BasicWorkflow.Outputs(out=attrs.NOTHING) - return mock_workflow +def node_exec(node_c, workflow_task): + # We only use this to resolve the upstream outputs from, can be any node + return NodeExecution(node=node_c, workflow_inputs=workflow_task, submitter=Mock()) -def test_lazy_inp(mock_workflow): - lf = LazyInField(field="a", type=int, workflow=mock_workflow) - assert lf._get_value() == "a" +def test_lazy_inp(workflow_obj, node_exec): + lf = LazyInField(field="x", type=int, workflow=workflow_obj) + assert lf._get_value(node_exec) == 1 - lf = LazyInField(field="b", type=str, workflow_def=mock_workflow) - assert lf._get_value() == 1 + lf = LazyInField(field="y", type=str, workflow=workflow_obj) + assert lf._get_value(node_exec) == [1, 2, 3] -def test_lazy_out(): - tn = NodeTesting() - lzout = LazyOutField(task=tn) - lf = lzout.out_a - assert lf.get_value(wf=WorkflowTesting()) == "OUT_A" +def test_lazy_out(node_a, node_exec): + lf = LazyOutField(field="a", type=int, node=node_a) + assert lf._get_value(node_exec) == 3 def test_lazy_getvale(): @@ -409,3 +362,18 @@ def Outer(xs): outputs = outer(cache_dir=tmp_path) assert outputs.out == [1, 2, 3] + + +def test_runtime(): + runtime = Runtime() + assert hasattr(runtime, "rss_peak_gb") + assert hasattr(runtime, "vms_peak_gb") + assert hasattr(runtime, "cpu_peak_percent") + + +def test_result(tmp_path): + result = Result(output_dir=tmp_path) + assert hasattr(result, "runtime") + assert hasattr(result, "outputs") + assert hasattr(result, "errored") + assert getattr(result, "errored") is False diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 9fc1d5f91f..47ba21e4ce 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -290,8 +290,8 @@ def FunFileList(filename_list: ty.List[File]): @workflow.define(outputs=["out"]) def BasicWorkflow(x): - task1 = workflow.add(FunAddTwo(a=x)) - task2 = workflow.add(FunAddVar(a=task1.out, b=2)) + task1 = workflow.add(FunAddTwo(a=x), name="A") + task2 = workflow.add(FunAddVar(a=task1.out, b=2), name="B") return task2.out diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 7e39fc541b..6c538efaa8 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -7,7 +7,7 @@ import types import typing as ty import logging -import attr +import attrs from pydra.utils import add_exc_note from fileformats import field, core, generic @@ -217,8 +217,8 @@ def __call__(self, obj: ty.Any) -> T: from pydra.engine.helpers import is_lazy coerced: T - if obj is attr.NOTHING: - coerced = attr.NOTHING # type: ignore[assignment] + if obj is attrs.NOTHING: + coerced = attrs.NOTHING # type: ignore[assignment] elif is_lazy(obj): try: self.check_type(obj._type) @@ -279,8 +279,8 @@ def coerce(self, object_: ty.Any) -> T: def expand_and_coerce(obj, pattern: ty.Union[type, tuple]): """Attempt to expand the object along the lines of the coercion pattern""" - if obj is attr.NOTHING: - return attr.NOTHING + if obj is attrs.NOTHING: + return attrs.NOTHING if not isinstance(pattern, tuple): return coerce_basic(obj, pattern) origin, pattern_args = pattern From db8f7992a929b26af94bd4e73f04d65ab4e4cae7 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 14:26:06 +1100 Subject: [PATCH 282/342] finished debugging test_specs --- pydra/engine/core.py | 4 +- pydra/engine/lazy.py | 36 +++-- pydra/engine/node.py | 6 +- pydra/engine/specs.py | 2 +- pydra/engine/state.py | 51 ++++++- pydra/engine/submitter.py | 69 ++++++--- pydra/engine/tests/test_specs.py | 248 ++++++++++++------------------- 7 files changed, 220 insertions(+), 196 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 9b7af85008..9b0ac65d79 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -817,9 +817,7 @@ def node_names(self) -> list[str]: def execution_graph(self, submitter: "Submitter") -> DiGraph: from pydra.engine.submitter import NodeExecution - exec_nodes = [ - NodeExecution(n, submitter, workflow_inputs=self.inputs) for n in self.nodes - ] + exec_nodes = [NodeExecution(n, submitter, workflow=self) for n in self.nodes] graph = self._create_graph(exec_nodes) # Set the graph attribute of the nodes so lazy fields can be resolved as tasks # are created diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 936d47ead5..fd1f628a24 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -6,7 +6,7 @@ from . import node if ty.TYPE_CHECKING: - from .submitter import NodeExecution + from .submitter import DiGraph, NodeExecution from .core import Task, Workflow from .specs import TaskDef from .state import StateIndex @@ -47,15 +47,18 @@ def _apply_cast(self, value): def _get_value( self, - node_exec: "NodeExecution", + workflow: "Workflow", + graph: "DiGraph[NodeExecution]", state_index: "StateIndex | None" = None, ) -> ty.Any: """Return the value of a lazy field. Parameters ---------- - node_exec: NodeExecution - the object representing the execution state of the current node + workflow: Workflow + the workflow object + graph: DiGraph[NodeExecution] + the graph representing the execution state of the workflow state_index : StateIndex, optional the state index of the field to access @@ -90,25 +93,27 @@ def _source(self): def _get_value( self, - node_exec: "NodeExecution", + workflow: "Workflow", + graph: "DiGraph[NodeExecution]", state_index: "StateIndex | None" = None, ) -> ty.Any: """Return the value of a lazy field. Parameters ---------- - node_exec: NodeExecution - the object representing the execution state of the current node + workflow: Workflow + the workflow object + graph: DiGraph[NodeExecution] + the graph representing the execution state of the workflow state_index : StateIndex, optional - the state index of the field to access (ignored, used for duck-typing with - LazyOutField) + the state index of the field to access Returns ------- value : Any the resolved value of the lazy-field """ - value = node_exec.workflow_inputs[self._field] + value = workflow.inputs[self._field] value = self._apply_cast(value) return value @@ -127,15 +132,18 @@ def __repr__(self): def _get_value( self, - node_exec: "NodeExecution", + workflow: "Workflow", + graph: "DiGraph[NodeExecution]", state_index: "StateIndex | None" = None, ) -> ty.Any: """Return the value of a lazy field. Parameters ---------- - node_exec: NodeExecution - the object representing the execution state of the current node + workflow: Workflow + the workflow object + graph: DiGraph[NodeExecution] + the graph representing the execution state of the workflow state_index : StateIndex, optional the state index of the field to access @@ -152,7 +160,7 @@ def _get_value( if state_index is None: state_index = StateIndex() - task = node_exec.graph.node(self._node.name).task(state_index) + task = graph.node(self._node.name).task(state_index) _, split_depth = TypeParser.strip_splits(self._type) def get_nested(task: "Task[DefType]", depth: int): diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 156478b7e1..8fd3bf0415 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -269,7 +269,11 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: """Get the states of the upstream nodes that are connected to this node""" upstream_states = {} for inpt_name, val in self.input_values: - if isinstance(val, lazy.LazyOutField) and val._node.state: + if ( + isinstance(val, lazy.LazyOutField) + and val._node.state + and val._node.state.depth + ): node: Node = val._node # variables that are part of inner splitters should be treated as a containers if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index d92ce66434..baea685f14 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -736,7 +736,7 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: nodes_dict = {n.name: n for n in exec_graph.nodes} for name, lazy_field in attrs_values(workflow.outputs).items(): try: - val_out = lazy_field._get_value(exec_graph) + val_out = lazy_field._get_value(workflow=workflow, graph=exec_graph) output_wf[name] = val_out except (ValueError, AttributeError): output_wf[name] = None diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 7ef22aa848..11a4290bcf 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -41,7 +41,13 @@ def __init__(self, indices: dict[str, int] | None = None): else: self.indices = OrderedDict(sorted(indices.items())) - def __repr__(self): + def __len__(self) -> int: + return len(self.indices) + + def __iter__(self) -> ty.Generator[str, None, None]: + return iter(self.indices) + + def __repr__(self) -> str: return ( "StateIndex(" + ", ".join(f"{n}={v}" for n, v in self.indices.items()) + ")" ) @@ -49,15 +55,49 @@ def __repr__(self): def __hash__(self): return hash(tuple(self.indices.items())) - def __eq__(self, other): + def __eq__(self, other) -> bool: return self.indices == other.indices - def __str__(self): + def __str__(self) -> str: return "__".join(f"{n}-{i}" for n, i in self.indices.items()) - def __bool__(self): + def __bool__(self) -> bool: return bool(self.indices) + def subset(self, state_names: ty.Iterable[str]) -> ty.Self: + """Create a new StateIndex with only the specified fields + + Parameters + ---------- + fields : list[str] + the fields to keep in the new StateIndex + + Returns + ------- + StateIndex + a new StateIndex with only the specified fields + """ + return type(self)({k: v for k, v in self.indices.items() if k in state_names}) + + def matches(self, other: "StateIndex") -> bool: + """Check if the indices that are present in the other StateIndex match + + Parameters + ---------- + other : StateIndex + the other StateIndex to compare against + + Returns + ------- + bool + True if all the indices in the other StateIndex match + """ + if not set(self.indices).issuperset(other.indices): + raise ValueError( + f"StateIndex {self} does not contain all the indices in {other}" + ) + return all(self.indices[k] == v for k, v in other.indices.items()) + class State: """ @@ -172,6 +212,9 @@ def __str__(self): def names(self): """Return the names of the states.""" # analysing states from connected tasks if inner_inputs + if not hasattr(self, "keys_final"): + self.prepare_states() + self.prepare_inputs() previous_states_keys = { f"_{v.name}": v.keys_final for v in self.inner_inputs.values() } diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index a869c10272..3236306a57 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -32,7 +32,8 @@ if ty.TYPE_CHECKING: from .node import Node - from .specs import TaskDef, TaskOutputs, WorkflowDef, TaskHooks, Result + from .specs import WorkflowDef, TaskDef, TaskOutputs, TaskHooks, Result + from .core import Workflow from .environments import Environment from .state import State @@ -501,7 +502,7 @@ class NodeExecution(ty.Generic[DefType]): _tasks: dict[StateIndex | None, "Task[DefType]"] | None - workflow_inputs: "WorkflowDef" + workflow: "Workflow" graph: DiGraph["NodeExecution"] | None @@ -509,7 +510,7 @@ def __init__( self, node: "Node", submitter: Submitter, - workflow_inputs: "WorkflowDef", + workflow: "Workflow", ): self.name = node.name self.node = node @@ -523,9 +524,17 @@ def __init__( self.running = {} # Not used in logic, but may be useful for progress tracking self.unrunnable = defaultdict(list) self.state_names = self.node.state.names if self.node.state else [] - self.workflow_inputs = workflow_inputs + self.workflow = workflow self.graph = None + def __repr__(self): + return ( + f"NodeExecution(name={self.name!r}, blocked={list(self.blocked)}, " + f"queued={list(self.queued)}, running={list(self.running)}, " + f"successful={list(self.successful)}, errored={list(self.errored)}, " + f"unrunnable={list(self.unrunnable)})" + ) + @property def inputs(self) -> "Node.Inputs": return self.node.inputs @@ -547,12 +556,16 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]: def task(self, index: StateIndex = StateIndex()) -> "Task | list[Task[DefType]]": """Get a task object for a given state index.""" self.tasks # Ensure tasks are loaded - try: - return self._tasks[index] - except KeyError: - if not index: - return StateArray(self._tasks.values()) - raise + task_index = next(iter(self._tasks)) + if len(task_index) > len(index): + tasks = [] + for ind, task in self._tasks.items(): + if ind.matches(index): + tasks.append(task) + return StateArray(tasks) + elif len(index) > len(task_index): + index = index.subset(task_index) + return self._tasks[index] @property def started(self) -> bool: @@ -651,10 +664,12 @@ def _resolve_lazy_inputs( The task definition with all lazy fields resolved """ resolved = {} - for name, value in attrs_values(self).items(): + for name, value in attrs_values(task_def).items(): if isinstance(value, LazyField): - resolved[name] = value._get_value(self, state_index) - return attrs.evolve(self, **resolved) + resolved[name] = value._get_value( + workflow=self.workflow, graph=self.graph, state_index=state_index + ) + return attrs.evolve(task_def, **resolved) def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """For a given node, check to see which tasks have been successfully run, are ready @@ -676,19 +691,35 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: runnable: list["Task[DefType]"] = [] self.tasks # Ensure tasks are loaded if not self.started: + assert self._tasks self.blocked = copy(self._tasks) # Check to see if any blocked tasks are now runnable/unrunnable for index, task in list(self.blocked.items()): pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: - if index not in pred.successful: + pred_jobs = pred.task(index) + if isinstance(pred_jobs, StateArray): + pred_inds = [j.state_index for j in pred_jobs] + else: + pred_inds = [pred_jobs.state_index] + if not all(i in pred.successful for i in pred_inds): is_runnable = False - if index in pred.errored: - self.unrunnable[index].append(self.blocked.pop(index)) - if index in pred.unrunnable: - self.unrunnable[index].extend(pred.unrunnable[index]) - self.blocked.pop(index) + blocked = True + if pred_errored := [i for i in pred_inds if i in pred.errored]: + self.unrunnable[index].extend( + [pred.errored[i] for i in pred_errored] + ) + blocked = False + if pred_unrunnable := [ + i for i in pred_inds if i in pred.unrunnable + ]: + self.unrunnable[index].extend( + [pred.unrunnable[i] for i in pred_unrunnable] + ) + blocked = False + if not blocked: + del self.blocked[index] break if is_runnable: runnable.append(self.blocked.pop(index)) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 20b410026e..8b1407d231 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -1,20 +1,20 @@ from pathlib import Path import typing as ty -import os -from unittest.mock import Mock import time import pytest from fileformats.generic import File from pydra.engine.specs import ( Runtime, Result, + WorkflowDef, ) from pydra.engine.lazy import ( LazyInField, LazyOutField, ) from pydra.engine.core import Workflow -from pydra.engine.submitter import NodeExecution +from pydra.engine.node import Node +from pydra.engine.submitter import Submitter, NodeExecution, DiGraph from pydra.utils.typing import StateArray from pydra.design import python, workflow from .utils import Foo, FunAddTwo, FunAddVar, ListSum @@ -29,12 +29,15 @@ def TestWorkflow(x: int, y: list[int]) -> int: @pytest.fixture -def workflow_task(): - return TestWorkflow(x=1, y=[1, 2, 3]) +def workflow_task(submitter: Submitter) -> WorkflowDef: + wf = TestWorkflow(x=1, y=[1, 2, 3]) + with submitter: + submitter(wf) + return wf @pytest.fixture -def workflow_obj(workflow_task): +def wf(workflow_task: WorkflowDef) -> Workflow: wf = Workflow.construct(workflow_task) for n in wf.nodes: if n._state: @@ -44,64 +47,63 @@ def workflow_obj(workflow_task): @pytest.fixture -def node_a(workflow_obj): - return workflow_obj["A"] +def submitter(tmp_path) -> Submitter: + return Submitter(tmp_path) @pytest.fixture -def node_b(workflow_obj): - return workflow_obj["B"] +def graph(wf: Workflow, submitter: Submitter) -> DiGraph[NodeExecution]: + return wf.execution_graph(submitter=submitter) @pytest.fixture -def node_c(workflow_obj): - return workflow_obj["C"] +def node_a(wf) -> Node: + return wf["A"] # we can pick any node to retrieve the values to -@pytest.fixture -def node_exec(node_c, workflow_task): - # We only use this to resolve the upstream outputs from, can be any node - return NodeExecution(node=node_c, workflow_inputs=workflow_task, submitter=Mock()) +def test_runtime(): + runtime = Runtime() + assert hasattr(runtime, "rss_peak_gb") + assert hasattr(runtime, "vms_peak_gb") + assert hasattr(runtime, "cpu_peak_percent") -def test_lazy_inp(workflow_obj, node_exec): - lf = LazyInField(field="x", type=int, workflow=workflow_obj) - assert lf._get_value(node_exec) == 1 +def test_result(tmp_path): + result = Result(output_dir=tmp_path) + assert hasattr(result, "runtime") + assert hasattr(result, "outputs") + assert hasattr(result, "errored") + assert getattr(result, "errored") is False - lf = LazyInField(field="y", type=str, workflow=workflow_obj) - assert lf._get_value(node_exec) == [1, 2, 3] +def test_lazy_inp(wf: Workflow, graph: DiGraph[NodeExecution]): + lf = LazyInField(field="x", type=int, workflow=wf) + assert lf._get_value(workflow=wf, graph=graph) == 1 -def test_lazy_out(node_a, node_exec): - lf = LazyOutField(field="a", type=int, node=node_a) - assert lf._get_value(node_exec) == 3 + lf = LazyInField(field="y", type=str, workflow=wf) + assert lf._get_value(workflow=wf, graph=graph) == [1, 2, 3] -def test_lazy_getvale(): - tn = NodeTesting() - lf = LazyIn(task=tn) - with pytest.raises(Exception) as excinfo: - lf.inp_c - assert ( - str(excinfo.value) - == "Task 'tn' has no input attribute 'inp_c', available: 'inp_a', 'inp_b'" - ) +def test_lazy_out(node_a, wf, graph): + lf = LazyOutField(field="out", type=int, node=node_a) + assert lf._get_value(wf, graph) == 3 def test_input_file_hash_1(tmp_path): - os.chdir(tmp_path) - outfile = "test.file" - fields = [("in_file", ty.Any)] - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseDef,)) - inputs = make_klass(input_spec) - assert inputs(in_file=outfile).hash == "9a106eb2830850834d9b5bf098d5fa85" + + outfile = tmp_path / "test.file" + outfile.touch() + + @python.define + def A(in_file: File) -> File: + return in_file + + assert A(in_file=outfile)._hash == "9644d3998748b339819c23ec6abec520" with open(outfile, "w") as fp: fp.write("test") - fields = [("in_file", File)] - input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseDef,)) - inputs = make_klass(input_spec) - assert inputs(in_file=outfile).hash == "02fa5f6f1bbde7f25349f54335e1adaf" + + assert A(in_file=outfile)._hash == "9f7f9377ddef6d8c018f1bf8e89c208c" def test_input_file_hash_2(tmp_path): @@ -110,26 +112,26 @@ def test_input_file_hash_2(tmp_path): with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo(name="Inputs", fields=[("in_file", File)], bases=(BaseDef,)) - inputs = make_klass(input_spec) + @python.define + def A(in_file: File) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=file).hash - assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3" + hash1 = A(in_file=file)._hash + assert hash1 == "179bd3cbdc747edc4957579376fe8c7d" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=file_diffname).hash + hash2 = A(in_file=file_diffname)._hash assert hash1 == hash2 # checking if different content (the same name) affects the hash - time.sleep(2) # ensure mtime is different file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=file_diffcontent).hash + hash3 = A(in_file=file_diffcontent)._hash assert hash1 != hash3 @@ -139,33 +141,31 @@ def test_input_file_hash_2a(tmp_path): with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", fields=[("in_file", ty.Union[File, int])], bases=(BaseDef,) - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: ty.Union[File, int]) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=file).hash - assert hash1 == "aaa50d60ed33d3a316d58edc882a34c3" + hash1 = A(in_file=file)._hash + assert hash1 == "179bd3cbdc747edc4957579376fe8c7d" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=file_diffname).hash + hash2 = A(in_file=file_diffname)._hash assert hash1 == hash2 + # checking if string is also accepted + hash3 = A(in_file=str(file))._hash + assert hash3 == hash1 + # checking if different content (the same name) affects the hash - time.sleep(2) # ensure mtime is different file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=file_diffcontent).hash - assert hash1 != hash3 - - # checking if string is also accepted - hash4 = inputs(in_file=str(file)).hash - assert hash4 == "800af2b5b334c9e3e5c40c0e49b7ffb5" + hash4 = A(in_file=file_diffcontent)._hash + assert hash1 != hash4 def test_input_file_hash_3(tmp_path): @@ -174,22 +174,21 @@ def test_input_file_hash_3(tmp_path): with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", fields=[("in_file", File), ("in_int", int)], bases=(BaseDef,) - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: File, in_int: int) -> File: + return in_file, in_int - my_inp = inputs(in_file=file, in_int=3) + a = A(in_file=file, in_int=3) # original hash and files_hash (dictionary contains info about files) - hash1 = my_inp.hash + hash1 = a._hash # files_hash1 = deepcopy(my_inp.files_hash) # file name should be in files_hash1[in_file] filename = str(Path(file)) # assert filename in files_hash1["in_file"] # changing int input - my_inp.in_int = 5 - hash2 = my_inp.hash + a.in_int = 5 + hash2 = a._hash # files_hash2 = deepcopy(my_inp.files_hash) # hash should be different assert hash1 != hash2 @@ -202,7 +201,7 @@ def test_input_file_hash_3(tmp_path): with open(file, "w") as f: f.write("hello") - hash3 = my_inp.hash + hash3 = a._hash # files_hash3 = deepcopy(my_inp.files_hash) # hash should be the same, # but the entry for in_file in files_hash should be different (modification time) @@ -214,11 +213,11 @@ def test_input_file_hash_3(tmp_path): # assert files_hash3["in_file"][filename][1] == files_hash2["in_file"][filename][1] # setting the in_file again - my_inp.in_file = file + a.in_file = file # filename should be removed from files_hash # assert my_inp.files_hash["in_file"] == {} # will be saved again when hash is calculated - assert my_inp.hash == hash3 + assert a._hash == hash3 # assert filename in my_inp.files_hash["in_file"] @@ -230,26 +229,23 @@ def test_input_file_hash_4(tmp_path): with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", - fields=[("in_file", ty.List[ty.List[ty.Union[int, File]]])], - bases=(BaseDef,), - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: ty.List[ty.List[ty.Union[int, File]]]) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=[[file, 3]]).hash - assert hash1 == "0693adbfac9f675af87e900065b1de00" + hash1 = A(in_file=[[file, 3]])._hash + assert hash1 == "ffd7afe0ca9d4585518809a509244b4b" # the same file, but int field changes - hash1a = inputs(in_file=[[file, 5]]).hash + hash1a = A(in_file=[[file, 5]])._hash assert hash1 != hash1a # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=[[file_diffname, 3]]).hash + hash2 = A(in_file=[[file_diffname, 3]])._hash assert hash1 == hash2 # checking if different content (the same name) affects the hash @@ -257,7 +253,7 @@ def test_input_file_hash_4(tmp_path): file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=[[file_diffcontent, 3]]).hash + hash3 = A(in_file=[[file_diffcontent, 3]])._hash assert hash1 != hash3 @@ -267,26 +263,23 @@ def test_input_file_hash_5(tmp_path): with open(file, "w") as f: f.write("hello") - input_spec = SpecInfo( - name="Inputs", - fields=[("in_file", ty.List[ty.Dict[ty.Any, ty.Union[File, int]]])], - bases=(BaseDef,), - ) - inputs = make_klass(input_spec) + @python.define + def A(in_file: ty.List[ty.Dict[ty.Any, ty.Union[File, int]]]) -> File: + return in_file # checking specific hash value - hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash - assert hash1 == "56e6e2c9f3bdf0cd5bd3060046dea480" + hash1 = A(in_file=[{"file": file, "int": 3}])._hash + assert hash1 == "ba884a74e33552854271f55b03e53947" # the same file, but int field changes - hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash + hash1a = A(in_file=[{"file": file, "int": 5}])._hash assert hash1 != hash1a # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") - hash2 = inputs(in_file=[{"file": file_diffname, "int": 3}]).hash + hash2 = A(in_file=[{"file": file_diffname, "int": 3}])._hash assert hash1 == hash2 # checking if different content (the same name) affects the hash @@ -294,53 +287,15 @@ def test_input_file_hash_5(tmp_path): file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") - hash3 = inputs(in_file=[{"file": file_diffcontent, "int": 3}]).hash + hash3 = A(in_file=[{"file": file_diffcontent, "int": 3}])._hash assert hash1 != hash3 -def test_lazy_field_cast(): - task = Foo(a="a", b=1, c=2.0, name="foo") - - assert task.lzout.y._type is int - assert workflow.cast(task.lzout.y, float)._type is float +def test_lazy_field_cast(wf: Workflow): + lzout = wf.add(Foo(a="a", b=1, c=2.0), name="foo") - -def test_lazy_field_multi_same_split(): - @python.define - def f(x: ty.List[int]) -> ty.List[int]: - return x - - task = f(x=[1, 2, 3], name="foo") - - lf = task.lzout.out.split("foo.x") - - assert lf.type == StateArray[int] - assert lf.splits == set([(("foo.x",),)]) - - lf2 = lf.split("foo.x") - assert lf2.type == StateArray[int] - assert lf2.splits == set([(("foo.x",),)]) - - -def test_lazy_field_multi_diff_split(): - @python.define - def F(x: ty.Any, y: ty.Any) -> ty.Any: - return x - - task = F(x=[1, 2, 3], name="foo") - - lf = task.lzout.out.split("foo.x") - - assert lf.type == StateArray[ty.Any] - assert lf.splits == set([(("foo.x",),)]) - - lf2 = lf.split("foo.x") - assert lf2.type == StateArray[ty.Any] - assert lf2.splits == set([(("foo.x",),)]) - - lf3 = lf.split("foo.y") - assert lf3.type == StateArray[StateArray[ty.Any]] - assert lf3.splits == set([(("foo.x",),), (("foo.y",),)]) + assert lzout.y._type is int + assert workflow.cast(lzout.y, float)._type is float def test_wf_lzin_split(tmp_path): @@ -362,18 +317,3 @@ def Outer(xs): outputs = outer(cache_dir=tmp_path) assert outputs.out == [1, 2, 3] - - -def test_runtime(): - runtime = Runtime() - assert hasattr(runtime, "rss_peak_gb") - assert hasattr(runtime, "vms_peak_gb") - assert hasattr(runtime, "cpu_peak_percent") - - -def test_result(tmp_path): - result = Result(output_dir=tmp_path) - assert hasattr(result, "runtime") - assert hasattr(result, "outputs") - assert hasattr(result, "errored") - assert getattr(result, "errored") is False From 33bb999bc1eb610bab84c9911ed7d3d25b4eae95 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 17:38:00 +1100 Subject: [PATCH 283/342] debugging test_specs --- pydra/engine/tests/test_specs.py | 1 - pydra/engine/tests/test_submitter.py | 479 +++++++++++++-------------- 2 files changed, 233 insertions(+), 247 deletions(-) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 8b1407d231..f607b1a67e 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -15,7 +15,6 @@ from pydra.engine.core import Workflow from pydra.engine.node import Node from pydra.engine.submitter import Submitter, NodeExecution, DiGraph -from pydra.utils.typing import StateArray from pydra.design import python, workflow from .utils import Foo, FunAddTwo, FunAddVar, ListSum diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 909d1c0649..33f72d7383 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -5,10 +5,10 @@ import time import attrs import typing as ty -from random import randint import os from unittest.mock import patch import pytest +from pydra.design import workflow from fileformats.generic import Directory from .utils import ( need_sge, @@ -23,65 +23,67 @@ from pydra.design import python from pathlib import Path from datetime import datetime +from pydra.engine.specs import Result @python.define -def sleep_add_one(x): +def SleepAddOne(x): time.sleep(1) return x + 1 def test_callable_wf(plugin, tmpdir): - wf = BasicWorkflow() - res = wf() - assert res.output.out == 9 - del wf, res + wf = BasicWorkflow(x=5) + outputs = wf(cache_dir=tmpdir) + assert outputs.out == 9 + del wf, outputs # providing plugin - wf = BasicWorkflow() - res = wf(plugin="cf") - assert res.output.out == 9 - del wf, res + wf = BasicWorkflow(x=5) + outputs = wf(worker="cf") + assert outputs.out == 9 + del wf, outputs # providing plugin_kwargs - wf = BasicWorkflow() - res = wf(plugin="cf", plugin_kwargs={"n_procs": 2}) - assert res.output.out == 9 - del wf, res + wf = BasicWorkflow(x=5) + outputs = wf(worker="cf", n_procs=2) + assert outputs.out == 9 + del wf, outputs # providing wrong plugin_kwargs - wf = BasicWorkflow() + wf = BasicWorkflow(x=5) with pytest.raises(TypeError, match="an unexpected keyword argument"): - wf(plugin="cf", plugin_kwargs={"sbatch_args": "-N2"}) + wf(worker="cf", sbatch_args="-N2") # providing submitter - wf = BasicWorkflow() - wf.cache_dir = tmpdir - sub = Submitter(plugin) - res = wf(submitter=sub) - assert res.output.out == 9 + wf = BasicWorkflow(x=5) + + with Submitter(workflow=plugin, cache_dir=tmpdir) as sub: + res = sub(wf) + assert res.outputs.out == 9 def test_concurrent_wf(plugin, tmpdir): # concurrent workflow # A --> C # B --> D - wf = Workflow("new_wf", input_spec=["x", "y"]) - wf.inputs.x = 5 - wf.inputs.y = 10 - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.lzin.y)) - wf.add(sleep_add_one(name="taskc", x=wf.taska.lzout.out)) - wf.add(sleep_add_one(name="taskd", x=wf.taskb.lzout.out)) - wf.set_output([("out1", wf.taskc.lzout.out), ("out2", wf.taskd.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + taska = workflow.add(SleepAddOne(x=x), name="taska") + taskb = workflow.add(SleepAddOne(x=y), name="taskb") + taskc = workflow.add(SleepAddOne(x=taska.out), name="taskc") + taskd = workflow.add(SleepAddOne(x=taskb.out), name="taskd") + return taskc.out, taskd.out + + wf = Workflow(x=5, y=10) - res = wf.result() - assert res.output.out1 == 7 - assert res.output.out2 == 12 + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert not results.errored, " ".join(results.errors["error message"]) + outputs = results.outputs + assert outputs.out1 == 7 + assert outputs.out2 == 12 def test_concurrent_wf_nprocs(tmpdir): @@ -89,49 +91,49 @@ def test_concurrent_wf_nprocs(tmpdir): # setting n_procs in Submitter that is passed to the worker # A --> C # B --> D - wf = Workflow("new_wf", input_spec=["x", "y"]) - wf.inputs.x = 5 - wf.inputs.y = 10 - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.lzin.y)) - wf.add(sleep_add_one(name="taskc", x=wf.taska.lzout.out)) - wf.add(sleep_add_one(name="taskd", x=wf.taskb.lzout.out)) - wf.set_output([("out1", wf.taskc.lzout.out), ("out2", wf.taskd.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter("cf", n_procs=2) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + taska = workflow.add(SleepAddOne(x=x), name="taska") + taskb = workflow.add(SleepAddOne(x=y), name="taskb") + taskc = workflow.add(SleepAddOne(x=taska.out), name="taskc") + taskd = workflow.add(SleepAddOne(x=taskb.out), name="taskd") + return taskc.out, taskd.out - res = wf.result() - assert res.output.out1 == 7 - assert res.output.out2 == 12 + wf = Workflow(x=5, y=10) + with Submitter(worker="cf", n_procs=2, cache_dir=tmpdir) as sub: + res = sub(wf) + + assert not res.errored, " ".join(res.errors["error message"]) + outputs = res.outputs + assert outputs.out1 == 7 + assert outputs.out2 == 12 def test_wf_in_wf(plugin, tmpdir): """WF(A --> SUBWF(A --> B) --> B)""" - wf = Workflow(name="wf_in_wf", input_spec=["x"]) - wf.inputs.x = 3 - wf.add(sleep_add_one(name="wf_a", x=wf.lzin.x)) # workflow task - subwf = Workflow(name="sub_wf", input_spec=["x"]) - subwf.add(sleep_add_one(name="sub_a", x=subwf.lzin.x)) - subwf.add(sleep_add_one(name="sub_b", x=subwf.sub_a.lzout.out)) - subwf.set_output([("out", subwf.sub_b.lzout.out)]) - # connect, then add - subwf.inputs.x = wf.wf_a.lzout.out - subwf.cache_dir = tmpdir - - wf.add(subwf) - wf.add(sleep_add_one(name="wf_b", x=wf.sub_wf.lzout.out)) - wf.set_output([("out", wf.wf_b.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(plugin) as sub: - sub(wf) + @workflow.define + def SubWf(x): + sub_a = workflow.add(SleepAddOne(x=x), name="sub_a") + sub_b = workflow.add(SleepAddOne(x=sub_a.out), name="sub_b") + return sub_b.out + + @workflow.define + def WfInWf(x): + a = workflow.add(SleepAddOne(x=x), name="a") + subwf = workflow.add(SubWf(x=a.out), name="subwf") + b = workflow.add(SleepAddOne(x=subwf.out), name="b") + return b.out + + wf = WfInWf(x=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - res = wf.result() - assert res.output.out == 7 + assert not results.errored, " ".join(results.errors["error message"]) + outputs = results.outputs + assert outputs.out == 7 @pytest.mark.flaky(reruns=2) # when dask @@ -139,60 +141,59 @@ def test_wf2(plugin_dask_opt, tmpdir): """workflow as a node workflow-node with one task and no splitter """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(sleep_add_one(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.inputs.x = 2 - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(SleepAddOne(x=x)) + return add2.out - with Submitter(worker=plugin_dask_opt) as sub: - sub(wf) + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + wf = Workflow(x=2) + + with Submitter(worker=plugin_dask_opt, cache_dir=tmpdir) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == 3 + assert res.outputs.out == 3 @pytest.mark.flaky(reruns=2) # when dask def test_wf_with_state(plugin_dask_opt, tmpdir): - wf = Workflow(name="wf_with_state", input_spec=["x"]) - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.taska.lzout.out)) + @workflow.define + def Workflow(x): + taska = workflow.add(SleepAddOne(x=x), name="taska") + taskb = workflow.add(SleepAddOne(x=taska.out), name="taskb") + return taskb.out - wf.split("x", x=[1, 2, 3]) - wf.set_output([("out", wf.taskb.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(worker=plugin_dask_opt) as sub: - sub(wf) + wf = Workflow().split(x=[1, 2, 3]) - res = wf.result() + with Submitter(cache_dir=tmpdir, worker=plugin_dask_opt) as sub: + res = sub(wf) - assert res[0].output.out == 3 - assert res[1].output.out == 4 - assert res[2].output.out == 5 + assert res.outputs.out[0] == 3 + assert res.outputs.out[1] == 4 + assert res.outputs.out[2] == 5 -def test_serial_wf(): +def test_debug_wf(): # Use serial plugin to execute workflow instead of CF - wf = BasicWorkflow() - res = wf(plugin="serial") - assert res.output.out == 9 + wf = BasicWorkflow(x=5) + outputs = wf(worker="debug") + assert outputs.out == 9 @need_slurm def test_slurm_wf(tmpdir): - wf = BasicWorkflow() - wf.cache_dir = tmpdir + wf = BasicWorkflow(x=1) # submit workflow and every task as slurm job - with Submitter("slurm") as sub: - sub(wf) + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == 9 + outputs = res.outputs + assert outputs.out == 9 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() # ensure each task was executed with slurm @@ -202,13 +203,11 @@ def test_slurm_wf(tmpdir): @need_slurm def test_slurm_wf_cf(tmpdir): # submit entire workflow as single job executing with cf worker - wf = BasicWorkflow() - wf.cache_dir = tmpdir - wf.plugin = "cf" - with Submitter("slurm") as sub: - sub(wf) - res = wf.result() - assert res.output.out == 9 + wf = BasicWorkflow(x=1) + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) + outputs = res.outputs + assert outputs.out == 9 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() # ensure only workflow was executed with slurm @@ -220,14 +219,12 @@ def test_slurm_wf_cf(tmpdir): @need_slurm def test_slurm_wf_state(tmpdir): - wf = BasicWorkflow() - wf.split("x", x=[5, 6]) - wf.cache_dir = tmpdir - with Submitter("slurm") as sub: - sub(wf) - res = wf.result() - assert res[0].output.out == 9 - assert res[1].output.out == 10 + wf = BasicWorkflow(x=1).split(x=[5, 6]) + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) + + assert res.outputs.out[0] == 9 + assert res.outputs.out[1] == 10 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] @@ -237,16 +234,18 @@ def test_slurm_wf_state(tmpdir): @need_slurm @pytest.mark.flaky(reruns=3) def test_slurm_max_jobs(tmpdir): - wf = Workflow("new_wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.inputs.x = 5 - wf.inputs.y = 10 - wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) - wf.add(sleep_add_one(name="taskb", x=wf.lzin.y)) - wf.add(sleep_add_one(name="taskc", x=wf.taska.lzout.out)) - wf.add(sleep_add_one(name="taskd", x=wf.taskb.lzout.out)) - wf.set_output([("out1", wf.taskc.lzout.out), ("out2", wf.taskd.lzout.out)]) - with Submitter("slurm", max_jobs=1) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + taska = workflow.add(SleepAddOne(x=x)) + taskb = workflow.add(SleepAddOne(x=y)) + taskc = workflow.add(SleepAddOne(x=taska.out)) + taskd = workflow.add(SleepAddOne(x=taskb.out)) + return taskc.out, taskd.out + + wf = Workflow(x=5, y=10) + + with Submitter(worker="slurm", cache_dir=tmpdir, max_jobs=1) as sub: + res = sub(wf) jobids = [] time.sleep(0.5) # allow time for sacct to collect itself @@ -277,14 +276,12 @@ def test_slurm_max_jobs(tmpdir): @need_slurm def test_slurm_args_1(tmpdir): """testing sbatch_args provided to the submitter""" - task = sleep_add_one(x=1) - task.cache_dir = tmpdir + task = SleepAddOne(x=1) # submit workflow and every task as slurm job - with Submitter("slurm", sbatch_args="-N1") as sub: - sub(task) + with Submitter(worker="slurm", cache_dir=tmpdir, sbatch_args="-N1") as sub: + res = sub(task) - res = task.result() - assert res.output.out == 2 + assert res.outputs.out == 2 script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() @@ -294,11 +291,12 @@ def test_slurm_args_2(tmpdir): """testing sbatch_args provided to the submitter exception should be raised for invalid options """ - task = sleep_add_one(x=1) - task.cache_dir = tmpdir + task = SleepAddOne(x=1) # submit workflow and every task as slurm job with pytest.raises(RuntimeError, match="Error returned from sbatch:"): - with Submitter("slurm", sbatch_args="-N1 --invalid") as sub: + with Submitter( + worker="slurm", cache_dir=tmpdir, sbatch_args="-N1 --invalid" + ) as sub: sub(task) @@ -347,26 +345,23 @@ def test_slurm_cancel_rerun_1(tmpdir): The first job should be re-queue and finish without problem. (possibly has to be improved, in theory cancel job might finish before cancel) """ - wf = Workflow( - name="wf", - input_spec=["x", "job_name_cancel", "job_name_resqueue"], - cache_dir=tmpdir, - ) - wf.add(sleep(name="sleep1", x=wf.lzin.x, job_name_part=wf.lzin.job_name_cancel)) - wf.add(cancel(name="cancel1", job_name_part=wf.lzin.job_name_resqueue)) - wf.inputs.x = 10 - wf.inputs.job_name_resqueue = "sleep1" - wf.inputs.job_name_cancel = "cancel1" - - wf.set_output([("out", wf.sleep1.lzout.out), ("canc_out", wf.cancel1.lzout.out)]) - with Submitter("slurm") as sub: - sub(wf) - res = wf.result() - assert res.output.out == 10 + @workflow.define(outputs=["out", "canc_out"]) + def Workflow(x, job_name_cancel, job_name_resqueue): + sleep1 = workflow.add(sleep(x=x, job_name_part=job_name_cancel)) + cancel1 = workflow.add(cancel(job_name_part=job_name_resqueue)) + return sleep1.out, cancel1.out + + wf = Workflow(x=10, job_name_resqueue="sleep1", job_name_cancel="cancel1") + + with Submitter(worker="slurm", cache_dir=tmpdir) as sub: + res = sub(wf) + + outputs = res.outputs + assert outputs.out == 10 # checking if indeed the sleep-task job was cancelled by cancel-task - assert "Terminating" in res.output.canc_out - assert "Invalid" not in res.output.canc_out + assert "Terminating" in outputs.canc_out + assert "Invalid" not in outputs.canc_out script_dir = tmpdir / "SlurmWorker_scripts" assert script_dir.exists() @@ -379,32 +374,32 @@ def test_slurm_cancel_rerun_2(tmpdir): job_id of the first task and cancel it. The first job is not able t be rescheduled and the error is returned. """ - wf = Workflow(name="wf", input_spec=["x", "job_name"], cache_dir=tmpdir) - wf.add(sleep(name="sleep2", x=wf.lzin.x)) - wf.add(cancel(name="cancel2", job_name_part=wf.lzin.job_name)) - wf.inputs.x = 10 - wf.inputs.job_name = "sleep2" + @workflow.define(outputs=["out", "canc_out"]) + def Workflow(x, job_name): + sleep2 = workflow.add(sleep(x=x)) + cancel2 = workflow.add(cancel(job_name_part=job_name)) + return sleep2.out, cancel2.out + + wf = Workflow(x=10, job_name="sleep2") - wf.set_output([("out", wf.sleep2.lzout.out), ("canc_out", wf.cancel2.lzout.out)]) with pytest.raises(Exception): - with Submitter("slurm", sbatch_args="--no-requeue") as sub: + with Submitter( + worker="slurm", cache_dir=tmpdir, sbatch_args="--no-requeue" + ) as sub: sub(wf) @need_sge def test_sge_wf(tmpdir): """testing that a basic workflow can be run with the SGEWorker""" - wf = BasicWorkflow() - wf.cache_dir = tmpdir + wf = BasicWorkflow(x=1) # submit workflow and every task as sge job - with Submitter( - "sge", - ) as sub: - sub(wf) + with Submitter(worker="sge", cache_dir=tmpdir) as sub: + res = sub(wf) - res = wf.result() - assert res.output.out == 9 + outputs = res.outputs + assert outputs.out == 9 script_dir = tmpdir / "SGEWorker_scripts" assert script_dir.exists() # ensure each task was executed with sge @@ -412,18 +407,16 @@ def test_sge_wf(tmpdir): @need_sge -def test_sge_wf_cf(tmpdir): +def test_sge_wf_cf(tmp_path): """testing the SGEWorker can submit SGE tasks while the workflow uses the concurrent futures plugin""" # submit entire workflow as single job executing with cf worker - wf = BasicWorkflow() - wf.cache_dir = tmpdir - wf.plugin = "cf" - with Submitter("sge") as sub: - sub(wf) - res = wf.result() - assert res.output.out == 9 - script_dir = tmpdir / "SGEWorker_scripts" + wf = BasicWorkflow(x=1) + with Submitter(worker="sge", cache_dir=tmp_path) as sub: + res = sub(wf) + outputs = res.outputs + assert outputs.out == 9 + script_dir = tmp_path / "SGEWorker_scripts" assert script_dir.exists() # ensure only workflow was executed with slurm sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] @@ -435,15 +428,11 @@ def test_sge_wf_cf(tmpdir): @need_sge def test_sge_wf_state(tmpdir): """testing the SGEWorker can be used with a workflow with state""" - wf = BasicWorkflow() - wf.split("x") - wf.inputs.x = [5, 6] - wf.cache_dir = tmpdir - with Submitter("sge") as sub: - sub(wf) - res = wf.result() - assert res[0].output.out == 9 - assert res[1].output.out == 10 + wf = BasicWorkflow().split(x=[5, 6]) + with Submitter(worker="sge", cache_dir=tmpdir) as sub: + res = sub(wf) + assert res.output.out[0] == 9 + assert res.output.out[1] == 10 script_dir = tmpdir / "SGEWorker_scripts" assert script_dir.exists() sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] @@ -469,12 +458,10 @@ def qacct_output_to_dict(qacct_output): def test_sge_set_threadcount(tmpdir): """testing the number of threads for an SGEWorker task can be set using the input_spec variable sgeThreads""" - wf = BasicWorkflowWithThreadCount() - wf.inputs.x = 5 - wf.cache_dir = tmpdir + wf = BasicWorkflowWithThreadCount(x=5) jobids = [] - with Submitter("sge") as sub: + with Submitter(worker="sge", cache_dir=tmpdir) as sub: sub(wf) jobids = list(sub.worker.jobid_by_task_uid.values()) jobids.sort() @@ -499,13 +486,10 @@ def test_sge_set_threadcount(tmpdir): def test_sge_limit_maxthreads(tmpdir): """testing the ability to limit the number of threads used by the SGE at one time with the max_threads argument to SGEWorker""" - wf = BasicWorkflowWithThreadCountConcurrent() - wf.inputs.x = [5, 6] - wf.split("x") - wf.cache_dir = tmpdir + wf = BasicWorkflowWithThreadCountConcurrent().split(x=[5, 6]) jobids = [] - with Submitter("sge", max_threads=8) as sub: + with Submitter(worker="sge", max_threads=8, cache_dir=tmpdir) as sub: sub(wf) jobids = list(sub.worker.jobid_by_task_uid.values()) jobids.sort() @@ -543,13 +527,10 @@ def test_sge_limit_maxthreads(tmpdir): def test_sge_no_limit_maxthreads(tmpdir): """testing unlimited threads can be used at once by SGE when max_threads is not set""" - wf = BasicWorkflowWithThreadCountConcurrent() - wf.inputs.x = [5, 6] - wf.split("x") - wf.cache_dir = tmpdir + wf = BasicWorkflowWithThreadCountConcurrent().split(x=[5, 6]) jobids = [] - with Submitter("sge", max_threads=None) as sub: + with Submitter(worker="sge", max_threads=None, cache_dir=tmpdir) as sub: sub(wf) jobids = list(sub.worker.jobid_by_task_uid.values()) jobids.sort() @@ -587,7 +568,7 @@ def output_dir_as_input(out_dir: Directory) -> Directory: task = output_dir_as_input(out_dir=tmp_path) with pytest.raises(RuntimeError, match="Input field hashes have changed"): - task() + task(cache_dir=tmp_path) def test_hash_changes_in_task_inputs_unstable(tmp_path): @@ -605,22 +586,28 @@ def unstable_input(unstable: Unstable) -> int: task = unstable_input(unstable=Unstable(1)) with pytest.raises(RuntimeError, match="Input field hashes have changed"): - task() + task(cache_dir=tmp_path) def test_hash_changes_in_workflow_inputs(tmp_path): @python.define - def output_dir_as_output(out_dir: Path) -> Directory: + def OutputDirAsOutput(out_dir: Path) -> Directory: (out_dir / "new-file.txt").touch() return out_dir - wf = Workflow( - name="test_hash_change", input_spec={"in_dir": Directory}, in_dir=tmp_path - ) - wf.add(output_dir_as_output(out_dir=wf.lzin.in_dir, name="task")) - wf.set_output(("out_dir", wf.task.lzout.out)) - with pytest.raises(RuntimeError, match="Input field hashes have changed.*Workflow"): - wf() + @workflow.define(outputs=["out_dir"]) + def Workflow(in_dir: Directory): + task = workflow.add(OutputDirAsOutput(out_dir=in_dir), name="task") + return task.out + + in_dir = tmp_path / "in_dir" + in_dir.mkdir() + cache_dir = tmp_path / "cache_dir" + cache_dir.mkdir() + + wf = Workflow(in_dir=in_dir) + with pytest.raises(RuntimeError, match="Input field hashes have changed.*"): + wf(cache_dir=cache_dir) def test_hash_changes_in_workflow_graph(tmpdir): @@ -639,35 +626,33 @@ def __bytes_repr__(self, cache): hopefully cases like this will be very rare""" yield bytes(self.value) - @python.define - @mark.annotate({"return": {"x": X, "y": int}}) - def identity(x: X) -> ty.Tuple[X, int]: + @python.define(outputs=["x", "y"]) + def Identity(x: X) -> ty.Tuple[X, int]: return x, 99 @python.define - def alter_x(y): + def AlterX(y): X.value = 2 return y @python.define - def to_tuple(x, y): + def ToTuple(x, y): return (x, y) - wf = Workflow(name="wf_with_blocked_tasks", input_spec=["x", "y"]) - wf.add(identity(name="taska", x=wf.lzin.x)) - wf.add(alter_x(name="taskb", y=wf.taska.lzout.y)) - wf.add(to_tuple(name="taskc", x=wf.taska.lzout.x, y=wf.taskb.lzout.out)) - wf.set_output([("out", wf.taskc.lzout.out)]) - - wf.inputs.x = X() + @workflow.define + def Workflow(x): + taska = workflow.add(Identity(x=x)) + taskb = workflow.add(AlterX(y=taska.y)) + taskc = workflow.add(ToTuple(x=taska.x, y=taskb.out)) + return taskc.out - wf.cache_dir = tmpdir + wf = Workflow(x=X()) with pytest.raises( RuntimeError, match="Graph of 'wf_with_blocked_tasks' workflow is not empty" ): - with Submitter("cf") as sub: - result = sub(wf) + with Submitter(worker="cf", cache_dir=tmpdir) as sub: + sub(wf) @python.define @@ -684,36 +669,38 @@ def __init__(self, add_var, **kwargs): super().__init__(**kwargs) self.add_var = add_var - async def exec_serial(self, runnable, rerun=False, environment=None): - if isinstance(runnable, Task): - with patch.dict(os.environ, {"BYO_ADD_VAR": str(self.add_var)}): - result = runnable._run(rerun, environment=environment) - return result - else: # it could be tuple that includes pickle files with tasks and inputs - return super().exec_serial(runnable, rerun, environment) + def run( + self, + task: "Task", + rerun: bool = False, + ) -> "Result": + with patch.dict(os.environ, {"BYO_ADD_VAR": str(self.add_var)}): + return super().run(task, rerun) @python.define -def add_env_var_task(x: int) -> int: +def AddEnvVarTask(x: int) -> int: return x + int(os.environ.get("BYO_ADD_VAR", 0)) -def test_byo_worker(): +def test_byo_worker(tmp_path): - task1 = add_env_var_task(x=1) + task1 = AddEnvVarTask(x=1) - with Submitter(worker=BYOAddVarWorker, add_var=10) as sub: - assert sub.plugin == "byo_add_env_var" + with Submitter(worker=BYOAddVarWorker, add_var=10, cache_dir=tmp_path) as sub: + assert sub.worker_name == "byo_add_env_var" result = sub(task1) - assert outputs.out == 11 + assert result.outputs.out == 11 + + task2 = AddEnvVarTask(x=2) - task2 = add_env_var_task(x=2) + new_cache_dir = tmp_path / "new" - with Submitter(worker="serial") as sub: + with Submitter(worker="debug", cache_dir=new_cache_dir) as sub: result = sub(task2) - assert outputs.out == 2 + assert result.outputs.out == 2 def test_bad_builtin_worker(): From bb75d94ef11165521a55de7e30a4af8f7e30e9e1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 17:39:40 +1100 Subject: [PATCH 284/342] deleted workflow to pick up changes in workflow graph error message as it is no longer necessary --- pydra/engine/tests/test_submitter.py | 45 ---------------------------- 1 file changed, 45 deletions(-) diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index 33f72d7383..ddce586cd5 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -610,51 +610,6 @@ def Workflow(in_dir: Directory): wf(cache_dir=cache_dir) -def test_hash_changes_in_workflow_graph(tmpdir): - class X: - """Dummy class with unstable hash (i.e. which isn't altered in a node in which - it is an input)""" - - value = 1 - - def __bytes_repr__(self, cache): - """Bytes representation from class attribute, which will be changed be - 'alter_x" node. - - NB: this is a contrived example where the bytes_repr implementation returns - a bytes representation of a class attribute in order to trigger the exception, - hopefully cases like this will be very rare""" - yield bytes(self.value) - - @python.define(outputs=["x", "y"]) - def Identity(x: X) -> ty.Tuple[X, int]: - return x, 99 - - @python.define - def AlterX(y): - X.value = 2 - return y - - @python.define - def ToTuple(x, y): - return (x, y) - - @workflow.define - def Workflow(x): - taska = workflow.add(Identity(x=x)) - taskb = workflow.add(AlterX(y=taska.y)) - taskc = workflow.add(ToTuple(x=taska.x, y=taskb.out)) - return taskc.out - - wf = Workflow(x=X()) - - with pytest.raises( - RuntimeError, match="Graph of 'wf_with_blocked_tasks' workflow is not empty" - ): - with Submitter(worker="cf", cache_dir=tmpdir) as sub: - sub(wf) - - @python.define def to_tuple(x, y): return (x, y) From bda7ea2e17482116f28c0a0c834e4ccb7c04b220 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 18:15:21 +1100 Subject: [PATCH 285/342] debugging test_task --- pydra/engine/specs.py | 2 +- pydra/engine/submitter.py | 2 +- pydra/engine/tests/test_task.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index baea685f14..c4f8ebc439 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -260,7 +260,7 @@ def __call__( else: errors = result.errors raise RuntimeError( - f"Task {self} failed @ {errors['time of crash']} with following errors:\n" + f"Task {self} failed @ {errors['time of crash']} with the following errors:\n" + "\n".join(errors["error message"]) ) return result.outputs diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 3236306a57..90cb77cefa 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -256,7 +256,7 @@ def Split( e.add_note(msg) raise e else: - logger.error("\nTask execution failed\n" + msg) + logger.error("\nTask execution failed\n%s", msg) finally: self.run_start_time = None PersistentCache().clean_up() diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index fcdf1d246c..17dff77354 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -896,7 +896,7 @@ def TestFunc(a: int, b: float = 0.1): data = json.load(f) if "@type" in data: if "AssociatedWith" in data: - assert "TestFunc" in data["Label"] + assert "main" in data["Label"] if "@type" in data: if data["@type"] == "input": @@ -931,7 +931,7 @@ def test_audit_shellcommandtask(tmpdir): if "@type" in data: if "AssociatedWith" in data: - assert "shelly" in data["Label"] + assert "main" == data["Label"] if "@type" in data: if data["@type"] == "input": From dfbd989d870495268abe9374e5181f2f403d8cea Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 18:24:42 +1100 Subject: [PATCH 286/342] fixed up remaining test_task tests --- pydra/engine/tests/test_task.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 17dff77354..153d0a3e9e 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1332,7 +1332,8 @@ def FunError(x): raise Exception("Error from the function") with pytest.raises(Exception, match="Error from the function") as exinfo: - FunError(x=3)(worker="cf", cache_dir=tmpdir) + with Submitter(worker="cf", cache_dir=tmpdir) as sub: + sub(FunError(x=3), raise_errors=True) # getting error file from the error message error_file_match = ( @@ -1364,8 +1365,9 @@ def Workflow(x_list): return fun_error.out wf = Workflow(x_list=[3, 4]) - with pytest.raises(Exception, match="Task 'fun_error' raised an error") as exinfo: - wf(worker="cf") + with pytest.raises(Exception, match="Task 'fun_error' raised an error.*") as exinfo: + with Submitter(worker="cf", cache_dir=tmpdir) as sub: + sub(wf, raise_errors=True) # getting error file from the error message error_file_match = ( From 85a3d95f9048bcf73306a4f49e73d15cabb1d682 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 18:39:22 +1100 Subject: [PATCH 287/342] fixed test_task_files --- pydra/engine/tests/test_tasks_files.py | 78 ++++++++++++-------------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index daf846b312..32d35cef07 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -5,22 +5,22 @@ import typing as ty from ..submitter import Submitter -from pydra.design import python +from pydra.design import python, workflow from fileformats.generic import File, Directory @python.define -def dir_count_file(dirpath): +def DirCountFile(dirpath: Directory) -> int: return len(os.listdir(dirpath)) @python.define -def dir_count_file_annot(dirpath: Directory): +def DirCountFileAnnot(dirpath: Directory) -> int: return len(os.listdir(dirpath)) @python.define -def file_add2(file): +def FileAdd2(file: File) -> File: array_inp = np.load(file) array_out = array_inp + 2 cwd = os.getcwd() @@ -31,7 +31,7 @@ def file_add2(file): @python.define -def file_mult(file): +def FileMult(file: File) -> File: array_inp = np.load(file) array_out = 10 * array_inp cwd = os.getcwd() @@ -41,7 +41,7 @@ def file_mult(file): @python.define -def file_add2_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): +def FileAdd2Annot(file: File) -> File: array_inp = np.load(file) array_out = array_inp + 2 cwd = os.getcwd() @@ -52,7 +52,7 @@ def file_add2_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): @python.define -def file_mult_annot(file: File) -> ty.NamedTuple("Output", [("out", File)]): +def FileMultAnnot(file: File) -> File: array_inp = np.load(file) array_out = 10 * array_inp cwd = os.getcwd() @@ -68,36 +68,38 @@ def test_task_1(tmpdir): # creating abs path file = os.path.join(os.getcwd(), "arr1.npy") np.save(file, arr) - nn = file_add2(name="add2", file=file) + nn = FileAdd2(file=file) with Submitter(worker="cf") as sub: - sub(nn) + res = sub(nn) # checking the results - results = nn.result() - res = np.load(results.output.out) - assert res == np.array([4]) + + result = np.load(res.outputs.out) + assert result == np.array([4]) def test_wf_1(tmpdir): """workflow with 2 tasks that take file as an input and give file as an aoutput""" - wf = Workflow(name="wf_1", input_spec=["file_orig"]) - wf.add(file_add2(name="add2", file=wf.lzin.file_orig)) - wf.add(file_mult(name="mult", file=wf.add2.lzout.out)) - wf.set_output([("out", wf.mult.lzout.out)]) + + @workflow.define + def Workflow(file_orig: File): + add2 = workflow.add(FileAdd2(file=file_orig)) + mult = workflow.add(FileMult(file=add2.out)) + return mult.out os.chdir(tmpdir) arr = np.array([2, 3]) # creating abs path file_orig = os.path.join(os.getcwd(), "arr_orig.npy") np.save(file_orig, arr) - wf.inputs.file_orig = file_orig + wf = Workflow(file_orig=file_orig) with Submitter(worker="cf") as sub: - sub(wf) + res = sub(wf) - assert wf.output_dir.exists() - file_output = wf.result().output.out + assert res.output_dir.exists() + file_output = res.outputs.out assert Path(file_output).exists() # loading results array_out = np.load(file_output) @@ -111,15 +113,15 @@ def test_file_annotation_1(tmpdir): # creating abs path file = os.path.join(os.getcwd(), "arr1.npy") np.save(file, arr) - nn = file_add2_annot(name="add2", file=file) + nn = FileAdd2Annot(file=file) with Submitter(worker="cf") as sub: - sub(nn) + res = sub(nn) # checking the results - results = nn.result() - res = np.load(results.output.out) - assert res == np.array([4]) + assert res.errored is False, " ".join(res.errors["error message"]) + arr = np.load(res.outputs.out) + assert arr == np.array([4]) def test_broken_file(tmpdir): @@ -127,13 +129,12 @@ def test_broken_file(tmpdir): os.chdir(tmpdir) file = os.path.join(os.getcwd(), "non_existent.npy") - nn = file_add2(name="add2", file=file) with pytest.raises(FileNotFoundError): with Submitter(worker="cf") as sub: - sub(nn) + sub(FileAdd2(file=file)) with pytest.raises(FileNotFoundError, match="do not exist"): - file_add2_annot(name="add2_annot", file=file) + FileAdd2Annot(file=file) def test_broken_file_link(tmpdir): @@ -149,31 +150,27 @@ def test_broken_file_link(tmpdir): os.symlink(file, file_link) os.remove(file) - nn = file_add2(name="add2", file=file_link) # raises error inside task # unless variable is defined as a File pydra will treat it as a string with pytest.raises(FileNotFoundError): with Submitter(worker="cf") as sub: - sub(nn) + sub(FileAdd2(file=file_link)) with pytest.raises(FileNotFoundError, match="do not exist"): - file_add2_annot(name="add2_annot", file=file_link) + FileAdd2Annot(file=file_link) def test_broken_dir(): """Test how broken directories are handled during hashing""" - # dirpath doesn't exist - nn = dir_count_file(name="listdir", dirpath="/broken_dir_path/") - # raises error inside task # unless variable is defined as a File pydra will treat it as a string with pytest.raises(FileNotFoundError): with Submitter(worker="cf") as sub: - sub(nn) + sub(DirCountFile(dirpath="/broken_dir_path/")) # raises error before task is run with pytest.raises(FileNotFoundError): - dir_count_file_annot(name="listdir", dirpath="/broken_dir_path/") + DirCountFileAnnot(dirpath="/broken_dir_path/") def test_broken_dir_link1(tmpdir): @@ -187,14 +184,13 @@ def test_broken_dir_link1(tmpdir): os.symlink(dir1, dir1_link) os.rmdir(dir1) - nn = dir_count_file(name="listdir", dirpath=Path(dir1)) # raises error while running task with pytest.raises(FileNotFoundError): with Submitter(worker="cf") as sub: - sub(nn) + sub(DirCountFile(dirpath=Path(dir1))) with pytest.raises(FileNotFoundError): - dir_count_file_annot(name="listdir", dirpath=Path(dir1)) + DirCountFileAnnot(dirpath=Path(dir1)) def test_broken_dir_link2(tmpdir): @@ -210,11 +206,11 @@ def test_broken_dir_link2(tmpdir): os.symlink(file1, file1_link) os.remove(file1) # file1_link is broken - nn = dir_count_file(name="listdir", dirpath=dir2) + nn = DirCountFile(dirpath=dir2) # does not raises error because pydra treats dirpath as a string with Submitter(worker="cf") as sub: sub(nn) - nn2 = dir_count_file_annot(name="listdir", dirpath=str(dir2)) + nn2 = DirCountFileAnnot(dirpath=str(dir2)) with Submitter(worker="cf") as sub: sub(nn2) From bf22f7854dd0d7d2fb30793f5b98317b44be27a5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 26 Feb 2025 19:31:00 +1100 Subject: [PATCH 288/342] started debugging test_node_tasks --- pydra/engine/core.py | 3 +- pydra/engine/tests/test_node_task.py | 278 +++++++++++++-------------- 2 files changed, 136 insertions(+), 145 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 9b0ac65d79..b2d9e5324d 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -735,8 +735,7 @@ def under_construction(cls) -> "Workflow[ty.Any]": # Find the frame where the construct method was called if ( frame.f_code.co_name == "construct" - and "cls" in frame.f_locals - and frame.f_locals["cls"] is cls + and frame.f_locals.get("cls") is cls and "workflow" in frame.f_locals ): return frame.f_locals["workflow"] # local var "workflow" in construct diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 51316324fd..499acea942 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -9,7 +9,7 @@ import pytest import time from fileformats.generic import File -from pydra.design import python +from pydra.design import python, workflow from .utils import ( FunAddTwo, @@ -24,9 +24,35 @@ Op4Var, ) -from ..core import Task +from pydra.engine.core import Task +from pydra.engine.specs import TaskDef +from pydra.engine.state import State from pydra.utils.typing import StateArray -from ..submitter import Submitter +from pydra.engine.submitter import Submitter +from pydra.engine.core import Workflow + + +@workflow.define +def IdentityWorkflow(a: int) -> int: + + @python.define + def Identity(a): + return a + + a = workflow.add(Identity(a=a)) + return a.out + + +def get_state(task: TaskDef) -> State: + """helper function to get the state of the task once it has been added to workflow""" + identity_workflow = IdentityWorkflow(a=1) + wf = Workflow.construct(identity_workflow) + wf.add(task, name="NA") + node = wf["NA"] + if node.state: + node.state.prepare_states() + node.state.prepare_inputs() + return node.state @pytest.fixture(scope="module") @@ -58,7 +84,7 @@ def test_task_init_1a(): def test_task_init_2(): """task with a name and inputs""" - nn = FunAddTwo(name="NA", a=3) + nn = FunAddTwo(a=3) # adding NA to the name of the variable assert getattr(nn.inputs, "a") == 3 assert nn.state is None @@ -77,15 +103,15 @@ def test_task_init_3( if input_type == "array": a_in = np.array(a_in) - nn = FunAddTwo().split(splitter=splitter, a=a_in) + nn = FunAddTwo().split(splitter, a=a_in) - assert np.allclose(nn.inputs.a, [3, 5]) - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn + assert np.allclose(nn.a, [3, 5]) + state = get_state(nn) + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == states_ind - assert nn.state.states_val == states_val + assert state.states_ind == states_ind + assert state.states_val == states_val @pytest.mark.parametrize( @@ -127,7 +153,7 @@ def test_task_init_3a( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = FunAddVar(name="NA").split(splitter=splitter, a=a_in, b=b_in) + nn = FunAddVar().split(splitter, a=a_in, b=b_in) assert np.allclose(nn.inputs.a, [3, 5]) assert np.allclose(nn.inputs.b, [10, 20]) @@ -141,8 +167,8 @@ def test_task_init_3a( def test_task_init_4(): """task with interface splitter and inputs set in the split method""" - nn = FunAddTwo(name="NA") - nn.split(splitter="a", a=[3, 5]) + nn = FunAddTwo() + nn.split("a", a=[3, 5]) assert np.allclose(nn.inputs.a, [3, 5]) assert nn.state.splitter == "NA.a" @@ -155,9 +181,9 @@ def test_task_init_4(): def test_task_init_4b(): """updating splitter using overwrite=True""" - nn = FunAddTwo(name="NA") - nn.split(splitter="a", a=[1, 2]) - nn.split(splitter="a", a=[3, 5], overwrite=True) + nn = FunAddTwo() + nn.split("a", a=[1, 2]) + nn.split("a", a=[3, 5], overwrite=True) assert np.allclose(nn.inputs.a, [3, 5]) assert nn.state.splitter == "NA.a" @@ -170,9 +196,9 @@ def test_task_init_4b(): def test_task_init_4c(): """trying to set splitter twice without using overwrite""" - nn = FunAddVar(name="NA").split(splitter="b", b=[1, 2]) + nn = FunAddVar().split("b", b=[1, 2]) with pytest.raises(Exception) as excinfo: - nn.split(splitter="a", a=[3, 5]) + nn.split("a", a=[3, 5]) assert "splitter has been already set" in str(excinfo.value) assert nn.state.splitter == "NA.b" @@ -182,16 +208,14 @@ def test_task_init_4d(): """trying to set the same splitter twice without using overwrite if the splitter is the same, the exception shouldn't be raised """ - nn = FunAddTwo(name="NA").split(splitter="a", a=[3, 5]) - nn.split(splitter="a", a=[3, 5]) + nn = FunAddTwo().split("a", a=[3, 5]) + nn.split("a", a=[3, 5]) assert nn.state.splitter == "NA.a" def test_task_init_5(): """task with inputs, splitter and combiner""" - nn = ( - FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") - ) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") assert nn.state.splitter == ["NA.a", "NA.b"] assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -219,9 +243,7 @@ def test_task_init_5(): def test_task_init_5a(): """updating combiner using overwrite=True""" - nn = ( - FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") - ) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") nn.combine("a", overwrite=True) assert nn.state.splitter == ["NA.a", "NA.b"] @@ -250,9 +272,7 @@ def test_task_init_5a(): def test_task_init_5b(): """updating combiner without using overwrite""" - nn = ( - FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") - ) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") with pytest.raises(Exception) as excinfo: nn.combine("a") assert "combiner has been already set" in str(excinfo.value) @@ -264,9 +284,7 @@ def test_task_init_5c(): """trying to set the same combiner twice without using overwrite if the combiner is the same, the exception shouldn't be raised """ - nn = ( - FunAddVar(name="NA").split(splitter=["a", "b"], a=[3, 5], b=[1, 2]).combine("b") - ) + nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") nn.combine("b") assert nn.state.splitter == ["NA.a", "NA.b"] @@ -279,9 +297,9 @@ def test_task_init_5c(): def test_task_init_6(): """task with splitter, but the input is an empty list""" - nn = FunAddTwo(name="NA") - nn.split(splitter="a", a=[]) - assert nn.inputs.a == [] + nn = FunAddTwo() + nn.split("a", a=[]) + assert nn.a == [] assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] @@ -301,7 +319,7 @@ def test_task_init_7(tmp_path): with open(file2, "w") as f: f.write("from pydra\n") - nn1 = FunFileList(name="NA", filename_list=[file1, file2]) + nn1 = FunFileList(filename_list=[file1, file2]) output_dir1 = nn1.output_dir # changing the content of the file @@ -310,7 +328,7 @@ def test_task_init_7(tmp_path): with open(file2, "w") as f: f.write("from pydra") - nn2 = FunFileList(name="NA", filename_list=[file1, file2]) + nn2 = FunFileList(filename_list=[file1, file2]) output_dir2 = nn2.output_dir # the checksum should be different - content of file2 is different @@ -319,17 +337,17 @@ def test_task_init_7(tmp_path): def test_task_init_8(): """task without setting the input, the value should be set to attr.NOTHING""" - nn = FunAddTwo(name="NA") + nn = FunAddTwo() assert nn.inputs.a is attr.NOTHING def test_task_init_9(): """task without setting the input, but using the default avlue from function""" - nn1 = FunAddVarDefault(name="NA", a=2) - assert nn1.inputs.b == 1 + nn1 = FunAddVarDefault(a=2) + assert nn1.b == 1 - nn2 = FunAddVarDefault(name="NA", a=2, b=1) - assert nn2.inputs.b == 1 + nn2 = FunAddVarDefault(a=2, b=1) + assert nn2.b == 1 # both tasks should have the same checksum assert nn1.checksum == nn2.checksum @@ -345,7 +363,7 @@ def test_odir_init(): """checking if output_dir is available for a task without init before running the task """ - nn = FunAddTwo(name="NA", a=3) + nn = FunAddTwo(a=3) assert nn.output_dir @@ -355,7 +373,7 @@ def test_odir_init(): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1(plugin_dask_opt, tmp_path): """task without splitter""" - nn = FunAddTwo(name="NA", a=3) + nn = FunAddTwo(a=3) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -384,7 +402,7 @@ def test_task_nostate_1(plugin_dask_opt, tmp_path): def test_task_nostate_1_call(): """task without splitter""" - nn = FunAddTwo(name="NA", a=3) + nn = FunAddTwo(a=3) nn() # checking the results results = nn.result() @@ -396,7 +414,7 @@ def test_task_nostate_1_call(): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): """task without splitter""" - nn = FunAddTwo(name="NA", a=3) + nn = FunAddTwo(a=3) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -414,7 +432,7 @@ def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): """task without splitter""" - nn = FunAddTwo(name="NA", a=3) + nn = FunAddTwo(a=3) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -430,7 +448,7 @@ def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): def test_task_nostate_1_call_updateinp(): """task without splitter""" - nn = FunAddTwo(name="NA", a=30) + nn = FunAddTwo(a=30) # updating input when calling the node nn(a=3) @@ -443,7 +461,7 @@ def test_task_nostate_1_call_updateinp(): def test_task_nostate_2(plugin, tmp_path): """task with a list as an input, but no splitter""" - nn = Moment(name="NA", n=3, lst=[2, 3, 4]) + nn = Moment(n=3, lst=[2, 3, 4]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [3]) assert np.allclose(nn.inputs.lst, [2, 3, 4]) @@ -461,9 +479,9 @@ def test_task_nostate_2(plugin, tmp_path): def test_task_nostate_3(plugin, tmp_path): """task with a dictionary as an input""" - nn = FunDict(name="NA", d={"a": "ala", "b": "bala"}) + nn = FunDict(d={"a": "ala", "b": "bala"}) nn.cache_dir = tmp_path - assert nn.inputs.d == {"a": "ala", "b": "bala"} + assert nn.d == {"a": "ala", "b": "bala"} with Submitter(worker=plugin) as sub: sub(nn) @@ -481,7 +499,7 @@ def test_task_nostate_4(plugin, tmp_path): with open(file1, "w") as f: f.write("hello from pydra\n") - nn = FunFile(name="NA", filename=file1) + nn = FunFile(filename=file1) nn.cache_dir = tmp_path with Submitter(plugin) as sub: @@ -504,7 +522,7 @@ def test_task_nostate_5(tmp_path): with open(file2, "w") as f: f.write("from pydra\n") - nn = FunFileList(name="NA", filename_list=[file1, file2]) + nn = FunFileList(filename_list=[file1, file2]) nn() @@ -517,7 +535,7 @@ def test_task_nostate_5(tmp_path): def test_task_nostate_6(): """checking if the function gets the None value""" - nn = FunAddVarNone(name="NA", a=2, b=None) + nn = FunAddVarNone(a=2, b=None) assert nn.inputs.b is None nn() assert nn.result().output.out == 2 @@ -525,7 +543,7 @@ def test_task_nostate_6(): def test_task_nostate_6a_exception(): """checking if the function gets the attr.Nothing value""" - nn = FunAddVarNone(name="NA", a=2) + nn = FunAddVarNone(a=2) assert nn.inputs.b is attr.NOTHING with pytest.raises(TypeError) as excinfo: nn() @@ -534,8 +552,8 @@ def test_task_nostate_6a_exception(): def test_task_nostate_7(): """using the default value from the function for b input""" - nn = FunAddVarDefault(name="NA", a=2) - assert nn.inputs.b == 1 + nn = FunAddVarDefault(a=2) + assert nn.b == 1 nn() assert nn.result().output.out == 3 @@ -548,7 +566,7 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): """task with provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -567,7 +585,7 @@ def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): cache_dir = "test_task_nostate" (tmp_path / cache_dir).mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -592,11 +610,11 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn) - nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn2) @@ -620,11 +638,11 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) with Submitter(worker=plugin) as sub: sub(nn2, rerun=True) @@ -647,10 +665,10 @@ def test_task_nostate_cachelocations_nosubmitter(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) nn() - nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) nn2() # checking the results @@ -673,10 +691,10 @@ def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) nn() - nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) nn2(rerun=True) # checking the results @@ -702,11 +720,11 @@ def test_task_nostate_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir) + nn = FunAddTwo(a=3, cache_dir=cache_dir) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = FunAddTwo(name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) # updating cache location to non-existing dir with Submitter(worker=plugin) as sub: sub(nn2, cache_locations=cache_dir1) @@ -731,12 +749,12 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): if input_type == "array": a_in = np.array(a_in) - nn = FunAddTwo(name="NA").split(splitter="a", a=a_in) + nn = FunAddTwo().split("a", a=a_in) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() with Submitter(worker=plugin_dask_opt) as sub: sub(nn) @@ -770,14 +788,14 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): def test_task_state_1a(plugin, tmp_path): """task with the simplest splitter (inputs set separately)""" - nn = FunAddTwo(name="NA") - nn.split(splitter="a", a=[1, 2]) + nn = FunAddTwo() + nn.split("a", a=[1, 2]) nn.inputs.a = StateArray([3, 5]) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() with Submitter(worker=plugin) as sub: sub(nn) @@ -793,11 +811,11 @@ def test_task_state_singl_1(plugin, tmp_path): """Tasks with two inputs and a splitter (no combiner) one input is a single value, the other is in the splitter and combiner """ - nn = FunAddVar(name="NA").split(splitter="a", a=[3, 5], b=10) + nn = FunAddVar().split("a", a=[3, 5], b=10) nn.cache_dir = tmp_path - assert nn.inputs.a == [3, 5] - assert nn.inputs.b == 10 + assert nn.a == [3, 5] + assert nn.b == 10 assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] assert nn.state.splitter_final == "NA.a" @@ -863,11 +881,11 @@ def test_task_state_2( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = FunAddVar(name="NA").split(splitter=splitter, a=a_in, b=b_in) + nn = FunAddVar().split(splitter, a=a_in, b=b_in) nn.cache_dir = tmp_path - assert (nn.inputs.a == np.array([3, 5])).all() - assert (nn.inputs.b == np.array([10, 20])).all() + assert (nn.a == np.array([3, 5])).all() + assert (nn.b == np.array([10, 20])).all() assert nn.state.splitter == state_splitter assert nn.state.splitter_rpn == state_rpn assert nn.state.splitter_final == state_splitter @@ -903,12 +921,12 @@ def test_task_state_2( def test_task_state_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" - nn = FunAddTwo(name="NA").split(splitter="a", a=[]) + nn = FunAddTwo().split("a", a=[]) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] - assert nn.inputs.a == [] + assert nn.a == [] with Submitter(worker=plugin) as sub: sub(nn) @@ -928,7 +946,7 @@ def test_task_state_4(plugin, input_type, tmp_path): lst_in = [[2, 3, 4], [1, 2, 3]] if input_type == "array": lst_in = np.array(lst_in, dtype=int) - nn = Moment(name="NA", n=3).split(splitter="lst", lst=lst_in) + nn = Moment(n=3).split("lst", lst=lst_in) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, 3) @@ -957,7 +975,7 @@ def test_task_state_4(plugin, input_type, tmp_path): def test_task_state_4a(plugin, tmp_path): """task with a tuple as an input, and a simple splitter""" - nn = Moment(name="NA", n=3).split(splitter="lst", lst=[(2, 3, 4), (1, 2, 3)]) + nn = Moment(n=3).split("lst", lst=[(2, 3, 4), (1, 2, 3)]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, 3) @@ -979,9 +997,7 @@ def test_task_state_4a(plugin, tmp_path): def test_task_state_5(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter""" - nn = Moment(name="NA").split( - splitter=("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] - ) + nn = Moment().split(("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3]) @@ -1005,9 +1021,7 @@ def test_task_state_5_exception(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter the shapes are not matching, so exception should be raised """ - nn = Moment(name="NA").split( - splitter=("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]] - ) + nn = Moment().split(("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3, 3]) @@ -1022,9 +1036,7 @@ def test_task_state_5_exception(plugin, tmp_path): def test_task_state_6(plugin, tmp_path): """ask with a list as an input, and the variable is part of the outer splitter""" - nn = Moment(name="NA").split( - splitter=["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] - ) + nn = Moment().split(["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3]) @@ -1046,9 +1058,7 @@ def test_task_state_6(plugin, tmp_path): def test_task_state_6a(plugin, tmp_path): """ask with a tuple as an input, and the variable is part of the outer splitter""" - nn = Moment(name="NA").split( - splitter=["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)] - ) + nn = Moment().split(["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)]) nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3]) @@ -1071,10 +1081,10 @@ def test_task_state_6a(plugin, tmp_path): @pytest.mark.flaky(reruns=2) # when dask def test_task_state_comb_1(plugin_dask_opt, tmp_path): """task with the simplest splitter and combiner""" - nn = FunAddTwo(name="NA").split(a=[3, 5], splitter="a").combine(combiner="a") + nn = FunAddTwo().split(a=[3, 5]).combine(combiner="a") nn.cache_dir = tmp_path - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] @@ -1207,14 +1217,10 @@ def test_task_state_comb_2( tmp_path, ): """Tasks with scalar and outer splitters and partial or full combiners""" - nn = ( - FunAddVar(name="NA") - .split(a=[3, 5], b=[10, 20], splitter=splitter) - .combine(combiner=combiner) - ) + nn = FunAddVar().split(splitter, a=[3, 5], b=[10, 20]).combine(combiner=combiner) nn.cache_dir = tmp_path - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() assert nn.state.splitter == state_splitter assert nn.state.splitter_rpn == state_rpn @@ -1256,11 +1262,11 @@ def test_task_state_comb_singl_1(plugin, tmp_path): """Tasks with two inputs; one input is a single value, the other is in the splitter and combiner """ - nn = FunAddVar(name="NA").split(splitter="a", a=[3, 5], b=10).combine(combiner="a") + nn = FunAddVar().split("a", a=[3, 5], b=10).combine(combiner="a") nn.cache_dir = tmp_path - assert nn.inputs.a == [3, 5] - assert nn.inputs.b == 10 + assert nn.a == [3, 5] + assert nn.b == 10 assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] assert nn.state.combiner == ["NA.a"] @@ -1284,12 +1290,12 @@ def test_task_state_comb_singl_1(plugin, tmp_path): def test_task_state_comb_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" - nn = FunAddTwo(name="NA").split(splitter="a", a=[]).combine(combiner=["a"]) + nn = FunAddTwo().split("a", a=[]).combine(combiner=["a"]) nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] - assert nn.inputs.a == [] + assert nn.a == [] with Submitter(worker=plugin) as sub: sub(nn) @@ -1309,11 +1315,7 @@ def test_task_state_comb_order(): """ # single combiner "a" - will create two lists, first one for b=3, second for b=5 - nn_a = ( - FunAddVar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner="a") - ) + nn_a = FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner="a") assert nn_a.state.combiner == ["NA.a"] results_a = nn_a() @@ -1321,11 +1323,7 @@ def test_task_state_comb_order(): assert combined_results_a == [[13, 23], [15, 25]] # single combiner "b" - will create two lists, first one for a=10, second for a=20 - nn_b = ( - FunAddVar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner="b") - ) + nn_b = FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner="b") assert nn_b.state.combiner == ["NA.b"] results_b = nn_b() @@ -1334,9 +1332,7 @@ def test_task_state_comb_order(): # combiner with both fields ["a", "b"] - will create one list nn_ab = ( - FunAddVar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner=["a", "b"]) + FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner=["a", "b"]) ) assert nn_ab.state.combiner == ["NA.a", "NA.b"] @@ -1348,9 +1344,7 @@ def test_task_state_comb_order(): # combiner with both fields ["b", "a"] - will create the same list as nn_ab # no difference in the order for setting combiner nn_ba = ( - FunAddVar(name="NA") - .split(splitter=["a", "b"], a=[10, 20], b=[3, 5]) - .combine(combiner=["b", "a"]) + FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner=["b", "a"]) ) assert nn_ba.state.combiner == ["NA.b", "NA.a"] @@ -1450,10 +1444,10 @@ def test_task_state_cachedir(plugin_dask_opt, tmp_path): """task with a state and provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = FunAddTwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(cache_dir=cache_dir).split("a", a=[3, 5]) assert nn.state.splitter == "NA.a" - assert (nn.inputs.a == np.array([3, 5])).all() + assert (nn.a == np.array([3, 5])).all() with Submitter(worker=plugin_dask_opt) as sub: sub(nn) @@ -1475,13 +1469,13 @@ def test_task_state_cachelocations(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(a=3, cache_dir=cache_dir).split("a", a=[3, 5]) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = FunAddTwo( - name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir - ).split(splitter="a", a=[3, 5]) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir).split( + "a", a=[3, 5] + ) with Submitter(worker=plugin) as sub: sub(nn2) @@ -1506,13 +1500,13 @@ def test_task_state_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(a=3, cache_dir=cache_dir).split("a", a=[3, 5]) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = FunAddTwo( - name="NA", a=3, cache_dir=cache_dir2, cache_locations=cache_dir - ).split(splitter="a", a=[3, 5]) + nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir).split( + "a", a=[3, 5] + ) with Submitter(worker=plugin) as sub: sub(nn2, rerun=True) @@ -1541,12 +1535,12 @@ def test_task_state_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) + nn = FunAddTwo(cache_dir=cache_dir).split("a", a=[3, 5]) with Submitter(worker=plugin) as sub: sub(nn) - nn2 = FunAddTwo(name="NA", cache_dir=cache_dir2, cache_locations=cache_dir).split( - splitter="a", a=[3, 5] + nn2 = FunAddTwo(cache_dir=cache_dir2, cache_locations=cache_dir).split( + "a", a=[3, 5] ) with Submitter(worker=plugin) as sub: sub(nn2, cache_locations=cache_dir1) @@ -1579,13 +1573,11 @@ def test_task_files_cachelocations(plugin_dask_opt, tmp_path): input2 = input_dir / "input2.txt" input2.write_text("test") - nn = FunFile(name="NA", filename=input1, cache_dir=cache_dir) + nn = FunFile(filename=input1, cache_dir=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn) - nn2 = FunFile( - name="NA", filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir - ) + nn2 = FunFile(filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir) with Submitter(worker=plugin_dask_opt) as sub: sub(nn2) From f2bf301ff5248bbdec70ffa936afe9e52b117fc9 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 27 Feb 2025 11:49:06 +1100 Subject: [PATCH 289/342] updated tests in test_node_task (most are passing) --- pydra/engine/core.py | 11 +- pydra/engine/specs.py | 12 +- pydra/engine/submitter.py | 29 +- pydra/engine/tests/test_node_task.py | 1103 +++++++++++--------------- 4 files changed, 508 insertions(+), 647 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index b2d9e5324d..d2c1ce8938 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -371,7 +371,7 @@ def run(self, rerun: bool = False): self.audit.audit_task(task=self) try: self.audit.monitor() - self.definition._run(self) + self.definition._run(self, rerun) result.outputs = self.definition.Outputs._from_task(self) except Exception: etype, eval, etr = sys.exc_info() @@ -425,7 +425,7 @@ async def run_async(self, rerun: bool = False) -> Result: self.audit.start_audit(odir=self.output_dir) try: self.audit.monitor() - await self.definition._run_async(self) + await self.definition._run_async(self, rerun) result.outputs = self.definition.Outputs._from_task(self) except Exception: etype, eval, etr = sys.exc_info() @@ -628,8 +628,7 @@ def clear_cache( @classmethod def construct( - cls, - definition: WorkflowDef[WorkflowOutputsType], + cls, definition: WorkflowDef[WorkflowOutputsType], dont_cache: bool = False ) -> Self: """Construct a workflow from a definition, caching the constructed worklow""" @@ -722,8 +721,8 @@ def construct( f"Expected outputs {unset_outputs} to be set by the " f"constructor of {workflow!r}" ) - - cls._constructed_cache[defn_hash][non_lazy_keys][non_lazy_hash] = workflow + if not dont_cache: + cls._constructed_cache[defn_hash][non_lazy_keys][non_lazy_hash] = workflow return workflow diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index c4f8ebc439..fba330841c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -686,7 +686,7 @@ class PythonDef(TaskDef[PythonOutputsType]): _task_type = "python" - def _run(self, task: "Task[PythonDef]") -> None: + def _run(self, task: "Task[PythonDef]", rerun: bool = True) -> None: # Prepare the inputs to the function inputs = attrs_values(self) del inputs["function"] @@ -773,13 +773,13 @@ class WorkflowDef(TaskDef[WorkflowOutputsType]): _constructed = attrs.field(default=None, init=False, repr=False, eq=False) - def _run(self, task: "Task[WorkflowDef]") -> None: + def _run(self, task: "Task[WorkflowDef]", rerun: bool) -> None: """Run the workflow.""" - task.submitter.expand_workflow(task) + task.submitter.expand_workflow(task, rerun) - async def _run_async(self, task: "Task[WorkflowDef]") -> None: + async def _run_async(self, task: "Task[WorkflowDef]", rerun: bool) -> None: """Run the workflow asynchronously.""" - await task.submitter.expand_workflow_async(task) + await task.submitter.expand_workflow_async(task, rerun) def construct(self) -> "Workflow": from pydra.engine.core import Workflow @@ -971,7 +971,7 @@ class ShellDef(TaskDef[ShellOutputsType]): RESERVED_FIELD_NAMES = TaskDef.RESERVED_FIELD_NAMES + ("cmdline",) - def _run(self, task: "Task[ShellDef]") -> None: + def _run(self, task: "Task[ShellDef]", rerun: bool = True) -> None: """Run the shell command.""" task.return_values = task.environment.execute(task) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 90cb77cefa..711322552d 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -58,9 +58,6 @@ class Submitter: The worker to use, by default "cf" environment: Environment, optional The execution environment to use, by default None - rerun : bool, optional - Whether to force the re-computation of the task results even if existing - results are found, by default False cache_locations : list[os.PathLike], optional Alternate cache locations to check for pre-computed results, by default None audit_flags : AuditFlag, optional @@ -81,7 +78,6 @@ class Submitter: cache_dir: os.PathLike worker: Worker environment: "Environment | None" - rerun: bool cache_locations: list[os.PathLike] audit_flags: AuditFlag messengers: ty.Iterable[Messenger] @@ -91,10 +87,10 @@ class Submitter: def __init__( self, + /, cache_dir: os.PathLike | None = None, worker: str | ty.Type[Worker] | Worker | None = "debug", environment: "Environment | None" = None, - rerun: bool = False, cache_locations: list[os.PathLike] | None = None, audit_flags: AuditFlag = AuditFlag.NONE, messengers: ty.Iterable[Messenger] | None = None, @@ -127,7 +123,6 @@ def __init__( self.cache_dir = cache_dir self.cache_locations = cache_locations self.environment = environment if environment is not None else Native() - self.rerun = rerun self.loop = get_open_loop() self._own_loop = not self.loop.is_running() if isinstance(worker, Worker): @@ -177,6 +172,7 @@ def __call__( task_def: "TaskDef[OutputType]", hooks: "TaskHooks | None" = None, raise_errors: bool | None = None, + rerun: bool = False, ) -> "Result[OutputType]": """Submitter run function. @@ -190,6 +186,9 @@ def __call__( raise_errors : bool, optional Whether to raise errors, by default True if the 'debug' worker is used, otherwise False + rerun : bool, optional + Whether to force the re-computation of the task results even if existing + results are found, by default False Returns ------- @@ -242,11 +241,9 @@ def Split( try: self.run_start_time = datetime.now() if self.worker.is_async: # Only workflow tasks can be async - self.loop.run_until_complete( - self.worker.run_async(task, rerun=self.rerun) - ) + self.loop.run_until_complete(self.worker.run_async(task, rerun=rerun)) else: - self.worker.run(task, rerun=self.rerun) + self.worker.run(task, rerun=rerun) except Exception as e: msg = ( f"Full crash report for {type(task_def).__name__!r} task is here: " @@ -288,7 +285,7 @@ def __setstate__(self, state): self._worker = WORKERS[self.worker_name](**self.worker_kwargs) self.worker.loop = self.loop - def expand_workflow(self, workflow_task: "Task[WorkflowDef]") -> None: + def expand_workflow(self, workflow_task: "Task[WorkflowDef]", rerun: bool) -> None: """Expands and executes a workflow task synchronously. Typically only used during debugging and testing, as the asynchronous version is more efficient. @@ -305,11 +302,13 @@ def expand_workflow(self, workflow_task: "Task[WorkflowDef]") -> None: tasks = self.get_runnable_tasks(exec_graph) while tasks or any(not n.done for n in exec_graph.nodes): for task in tasks: - self.worker.run(task, rerun=self.rerun) + self.worker.run(task, rerun=rerun) tasks = self.get_runnable_tasks(exec_graph) workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} - async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> None: + async def expand_workflow_async( + self, workflow_task: "Task[WorkflowDef]", rerun: bool + ) -> None: """ Expand and execute a workflow task asynchronously. @@ -400,9 +399,9 @@ async def expand_workflow_async(self, workflow_task: "Task[WorkflowDef]") -> Non raise RuntimeError(msg) for task in tasks: if task.is_async: - await self.worker.run_async(task, rerun=self.rerun) + await self.worker.run_async(task, rerun=rerun) else: - task_futures.add(self.worker.run(task, rerun=self.rerun)) + task_futures.add(self.worker.run(task, rerun=rerun)) task_futures = await self.worker.fetch_finished(task_futures) tasks = self.get_runnable_tasks(exec_graph) workflow_task.return_values = {"workflow": wf, "exec_graph": exec_graph} diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 499acea942..49dff68b8a 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -1,13 +1,11 @@ import os import shutil -import attr +import attrs import typing as ty import numpy as np import time -from unittest import mock from pathlib import Path import pytest -import time from fileformats.generic import File from pydra.design import python, workflow @@ -43,18 +41,22 @@ def Identity(a): return a.out -def get_state(task: TaskDef) -> State: +def get_state(task: TaskDef, name="NA") -> State: """helper function to get the state of the task once it has been added to workflow""" identity_workflow = IdentityWorkflow(a=1) - wf = Workflow.construct(identity_workflow) - wf.add(task, name="NA") - node = wf["NA"] + wf = Workflow.construct(identity_workflow, dont_cache=True) + wf.add(task, name=name) + node = wf[name] if node.state: node.state.prepare_states() node.state.prepare_inputs() return node.state +def num_python_cache_dirs(cache_path: Path) -> int: + return len(list(cache_path.glob("python-*"))) + + @pytest.fixture(scope="module") def change_dir(request): orig_dir = os.getcwd() @@ -68,15 +70,6 @@ def move2orig(): request.addfinalizer(move2orig) -# Tests for tasks initializations -def test_task_init_1(): - """task with mandatory arguments only""" - nn = FunAddTwo() - assert isinstance(nn, Task) - assert nn.name == "fun_addtwo" - assert hasattr(nn, "__call__") - - def test_task_init_1a(): with pytest.raises(TypeError): FunAddTwo("NA") @@ -86,8 +79,9 @@ def test_task_init_2(): """task with a name and inputs""" nn = FunAddTwo(a=3) # adding NA to the name of the variable - assert getattr(nn.inputs, "a") == 3 - assert nn.state is None + assert nn.a == 3 + state = get_state(nn) + assert state is None @pytest.mark.parametrize( @@ -154,54 +148,55 @@ def test_task_init_3a( elif input_type == "mixed": a_in = np.array(a_in) nn = FunAddVar().split(splitter, a=a_in, b=b_in) + state = get_state(nn) - assert np.allclose(nn.inputs.a, [3, 5]) - assert np.allclose(nn.inputs.b, [10, 20]) - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn + assert np.allclose(nn.a, [3, 5]) + assert np.allclose(nn.b, [10, 20]) + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == states_ind - assert nn.state.states_val == states_val + assert state.states_ind == states_ind + assert state.states_val == states_val def test_task_init_4(): """task with interface splitter and inputs set in the split method""" nn = FunAddTwo() - nn.split("a", a=[3, 5]) - assert np.allclose(nn.inputs.a, [3, 5]) + nn = nn.split("a", a=[3, 5]) + state = get_state(nn) + assert np.allclose(nn.a, [3, 5]) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] + assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] + assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] def test_task_init_4b(): """updating splitter using overwrite=True""" nn = FunAddTwo() - nn.split("a", a=[1, 2]) - nn.split("a", a=[3, 5], overwrite=True) - assert np.allclose(nn.inputs.a, [3, 5]) + nn = nn.split("a", a=[1, 2]) + nn = nn.split("a", a=[3, 5], overwrite=True) + state = get_state(nn) + assert np.allclose(nn.a, [3, 5]) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] + assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] + assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] def test_task_init_4c(): """trying to set splitter twice without using overwrite""" nn = FunAddVar().split("b", b=[1, 2]) + state = get_state(nn) with pytest.raises(Exception) as excinfo: nn.split("a", a=[3, 5]) - assert "splitter has been already set" in str(excinfo.value) + assert "Cannot overwrite existing splitter" in str(excinfo.value) - assert nn.state.splitter == "NA.b" + assert state.splitter == "NA.b" def test_task_init_4d(): @@ -209,75 +204,77 @@ def test_task_init_4d(): if the splitter is the same, the exception shouldn't be raised """ nn = FunAddTwo().split("a", a=[3, 5]) - nn.split("a", a=[3, 5]) - assert nn.state.splitter == "NA.a" + nn = nn.split("a", a=[3, 5], overwrite=True) + state = get_state(nn) + assert state.splitter == "NA.a" def test_task_init_5(): """task with inputs, splitter and combiner""" nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") + state = get_state(nn) - assert nn.state.splitter == ["NA.a", "NA.b"] - assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] - assert nn.state.combiner == ["NA.b"] + assert state.splitter == ["NA.a", "NA.b"] + assert state.splitter_rpn == ["NA.a", "NA.b", "*"] + assert state.combiner == ["NA.b"] - assert nn.state.splitter_final == "NA.a" - assert nn.state.splitter_rpn_final == ["NA.a"] + assert state.splitter_final == "NA.a" + assert state.splitter_rpn_final == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [ + assert state.states_ind == [ {"NA.a": 0, "NA.b": 0}, {"NA.a": 0, "NA.b": 1}, {"NA.a": 1, "NA.b": 0}, {"NA.a": 1, "NA.b": 1}, ] - assert nn.state.states_val == [ + assert state.states_val == [ {"NA.a": 3, "NA.b": 1}, {"NA.a": 3, "NA.b": 2}, {"NA.a": 5, "NA.b": 1}, {"NA.a": 5, "NA.b": 2}, ] - assert nn.state.final_combined_ind_mapping == {0: [0, 1], 1: [2, 3]} + assert state.final_combined_ind_mapping == {0: [0, 1], 1: [2, 3]} def test_task_init_5a(): """updating combiner using overwrite=True""" nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") - nn.combine("a", overwrite=True) + nn = nn.combine("a", overwrite=True) + state = get_state(nn) - assert nn.state.splitter == ["NA.a", "NA.b"] - assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] - assert nn.state.combiner == ["NA.a"] + assert state.splitter == ["NA.a", "NA.b"] + assert state.splitter_rpn == ["NA.a", "NA.b", "*"] + assert state.combiner == ["NA.a"] - assert nn.state.splitter_final == "NA.b" - assert nn.state.splitter_rpn_final == ["NA.b"] + assert state.splitter_final == "NA.b" + assert state.splitter_rpn_final == ["NA.b"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [ + assert state.states_ind == [ {"NA.a": 0, "NA.b": 0}, {"NA.a": 0, "NA.b": 1}, {"NA.a": 1, "NA.b": 0}, {"NA.a": 1, "NA.b": 1}, ] - assert nn.state.states_val == [ + assert state.states_val == [ {"NA.a": 3, "NA.b": 1}, {"NA.a": 3, "NA.b": 2}, {"NA.a": 5, "NA.b": 1}, {"NA.a": 5, "NA.b": 2}, ] - assert nn.state.final_combined_ind_mapping == {0: [0, 2], 1: [1, 3]} + assert state.final_combined_ind_mapping == {0: [0, 2], 1: [1, 3]} def test_task_init_5b(): """updating combiner without using overwrite""" nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") + state = get_state(nn) with pytest.raises(Exception) as excinfo: nn.combine("a") - assert "combiner has been already set" in str(excinfo.value) + assert "Attempting to overwrite existing combiner" in str(excinfo.value) - assert nn.state.combiner == ["NA.b"] + assert state.combiner == ["NA.b"] def test_task_init_5c(): @@ -285,28 +282,29 @@ def test_task_init_5c(): if the combiner is the same, the exception shouldn't be raised """ nn = FunAddVar().split(["a", "b"], a=[3, 5], b=[1, 2]).combine("b") - nn.combine("b") + state = get_state(nn) + nn = nn.combine("b", overwrite=True) - assert nn.state.splitter == ["NA.a", "NA.b"] - assert nn.state.splitter_rpn == ["NA.a", "NA.b", "*"] - assert nn.state.combiner == ["NA.b"] + assert state.splitter == ["NA.a", "NA.b"] + assert state.splitter_rpn == ["NA.a", "NA.b", "*"] + assert state.combiner == ["NA.b"] - assert nn.state.splitter_final == "NA.a" - assert nn.state.splitter_rpn_final == ["NA.a"] + assert state.splitter_final == "NA.a" + assert state.splitter_rpn_final == ["NA.a"] def test_task_init_6(): """task with splitter, but the input is an empty list""" nn = FunAddTwo() - nn.split("a", a=[]) + nn = nn.split("a", a=[]) + state = get_state(nn) assert nn.a == [] - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [] - assert nn.state.states_val == [] + assert state.states_ind == [] + assert state.states_val == [] def test_task_init_7(tmp_path): @@ -320,7 +318,7 @@ def test_task_init_7(tmp_path): f.write("from pydra\n") nn1 = FunFileList(filename_list=[file1, file2]) - output_dir1 = nn1.output_dir + hash1 = nn1._hash # changing the content of the file time.sleep(2) # need the mtime to be different @@ -329,16 +327,16 @@ def test_task_init_7(tmp_path): f.write("from pydra") nn2 = FunFileList(filename_list=[file1, file2]) - output_dir2 = nn2.output_dir + hash2 = nn2._hash # the checksum should be different - content of file2 is different - assert output_dir1.name != output_dir2.name + assert hash1 != hash2 def test_task_init_8(): - """task without setting the input, the value should be set to attr.NOTHING""" + """task without setting the input, the value should be set to attrs.NOTHING""" nn = FunAddTwo() - assert nn.inputs.a is attr.NOTHING + assert nn.a is attrs.NOTHING def test_task_init_9(): @@ -349,22 +347,14 @@ def test_task_init_9(): nn2 = FunAddVarDefault(a=2, b=1) assert nn2.b == 1 # both tasks should have the same checksum - assert nn1.checksum == nn2.checksum + assert nn1._hash == nn2._hash -def test_task_error(): - func = FunDiv(name="div", a=1, b=0) +def test_task_error(tmp_path): + func = FunDiv(a=1, b=0) with pytest.raises(ZeroDivisionError): - func() - assert (func.output_dir / "_error.pklz").exists() - - -def test_odir_init(): - """checking if output_dir is available for a task without init - before running the task - """ - nn = FunAddTwo(a=3) - assert nn.output_dir + func(cache_dir=tmp_path) + assert (next(tmp_path.iterdir()) / "_error.pklz").exists() # Tests for tasks without state (i.e. no splitter) @@ -374,123 +364,104 @@ def test_odir_init(): def test_task_nostate_1(plugin_dask_opt, tmp_path): """task without splitter""" nn = FunAddTwo(a=3) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None + + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 5 - # checking the return_inputs option, either is return_inputs is True, or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") - assert results_verb[0] == results_verb_val[0] == {"NA.a": 3} - assert results_verb[1].output.out == results_verb_val[1].output.out == 5 - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - assert results_verb_ind[0] == {"NA.a": None} - assert results_verb_ind[1].output.out == 5 + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() -def test_task_nostate_1_call(): +def test_task_nostate_1_call(tmp_path): """task without splitter""" nn = FunAddTwo(a=3) - nn() + with Submitter(cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): """task without splitter""" nn = FunAddTwo(a=3) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None - with Submitter(worker=plugin_dask_opt) as sub: - nn(submitter=sub) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None + + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() @pytest.mark.flaky(reruns=2) # when dask def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): """task without splitter""" nn = FunAddTwo(a=3) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None - - nn(plugin=plugin_dask_opt) - - # checking the results - results = nn.result() - assert results.output.out == 5 - # checking the output_dir - assert nn.output_dir.exists() + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None -def test_task_nostate_1_call_updateinp(): - """task without splitter""" - nn = FunAddTwo(a=30) - # updating input when calling the node - nn(a=3) + with Submitter(cache_dir=tmp_path, worker=plugin_dask_opt) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_2(plugin, tmp_path): """task with a list as an input, but no splitter""" nn = Moment(n=3, lst=[2, 3, 4]) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.n, [3]) - assert np.allclose(nn.inputs.lst, [2, 3, 4]) - assert nn.state is None - with Submitter(worker=plugin) as sub: - sub(nn) + assert np.allclose(nn.n, [3]) + assert np.allclose(nn.lst, [2, 3, 4]) + state = get_state(nn) + assert state is None + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 33 + + assert results.outputs.out == 33 # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_3(plugin, tmp_path): """task with a dictionary as an input""" nn = FunDict(d={"a": "ala", "b": "bala"}) - nn.cache_dir = tmp_path + assert nn.d == {"a": "ala", "b": "bala"} - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == "a:ala_b:bala" + + assert results.outputs.out == "a:ala_b:bala" # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_4(plugin, tmp_path): @@ -500,16 +471,15 @@ def test_task_nostate_4(plugin, tmp_path): f.write("hello from pydra\n") nn = FunFile(filename=file1) - nn.cache_dir = tmp_path - with Submitter(plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == "hello from pydra\n" + + assert results.outputs.out == "hello from pydra\n" # checking the output_dir - assert nn.output_dir.exists() + assert results.output_dir.exists() def test_task_nostate_5(tmp_path): @@ -524,27 +494,25 @@ def test_task_nostate_5(tmp_path): nn = FunFileList(filename_list=[file1, file2]) - nn() + outputs = nn() # checking the results - results = nn.result() - assert results.output.out == "hello from pydra\n" - # checking the output_dir - assert nn.output_dir.exists() + + assert outputs.out == "hello from pydra\n" def test_task_nostate_6(): """checking if the function gets the None value""" nn = FunAddVarNone(a=2, b=None) - assert nn.inputs.b is None - nn() - assert nn.result().output.out == 2 + assert nn.b is None + outputs = nn() + assert outputs.out == 2 def test_task_nostate_6a_exception(): - """checking if the function gets the attr.Nothing value""" + """checking if the function gets the attrs.Nothing value""" nn = FunAddVarNone(a=2) - assert nn.inputs.b is attr.NOTHING + assert nn.b is attrs.NOTHING with pytest.raises(TypeError) as excinfo: nn() assert "unsupported" in str(excinfo.value) @@ -554,8 +522,8 @@ def test_task_nostate_7(): """using the default value from the function for b input""" nn = FunAddVarDefault(a=2) assert nn.b == 1 - nn() - assert nn.result().output.out == 3 + outputs = nn() + assert outputs.out == 3 # Testing caching for tasks without states @@ -566,16 +534,17 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): """task with provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None + nn = FunAddTwo(a=3) + state = get_state(nn) + assert np.allclose(nn.a, [3]) + assert state is None - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 @pytest.mark.flaky(reruns=2) # when dask @@ -585,16 +554,17 @@ def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): cache_dir = "test_task_nostate" (tmp_path / cache_dir).mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - assert np.allclose(nn.inputs.a, [3]) - assert nn.state is None + nn = FunAddTwo(a=3) + assert np.allclose(nn.a, [3]) + state = get_state(nn) + assert state is None - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) # checking the results - results = nn.result() - assert results.output.out == 5 + + assert results.outputs.out == 5 shutil.rmtree(cache_dir) @@ -610,21 +580,22 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + nn = FunAddTwo(a=3) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn2) + nn2 = FunAddTwo(a=3) + with Submitter( + worker=plugin_dask_opt, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert results2.outputs.out == 5 # checking if the second task didn't run the interface again - assert nn.output_dir.exists() - assert not nn2.output_dir.exists() + assert results.output_dir == results2.output_dir def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): @@ -638,21 +609,23 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - with Submitter(worker=plugin) as sub: - sub(nn) + nn = FunAddTwo(a=3) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: + results = sub(nn) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(worker=plugin) as sub: - sub(nn2, rerun=True) + nn2 = FunAddTwo(a=3) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2, rerun=True) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert results2.outputs.out == 5 # checking if the second task rerun the interface - assert nn.output_dir.exists() - assert nn2.output_dir.exists() + assert results.output_dir.exists() + assert results2.output_dir.exists() def test_task_nostate_cachelocations_nosubmitter(tmp_path): @@ -665,19 +638,19 @@ def test_task_nostate_cachelocations_nosubmitter(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - nn() + nn = FunAddTwo(a=3) + nn(cache_dir=cache_dir) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - nn2() + nn2 = FunAddTwo(a=3) + outputs2 = nn2(cache_dir=cache_dir2, cache_locations=cache_dir) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert outputs2.out == 5 # checking if the second task didn't run the interface again - assert nn.output_dir.exists() - assert not nn2.output_dir.exists() + assert num_python_cache_dirs(cache_dir) == 1 + assert not num_python_cache_dirs(cache_dir2) def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): @@ -691,19 +664,19 @@ def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - nn() + nn = FunAddTwo(a=3) + nn(cache_dir=cache_dir) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) - nn2(rerun=True) + nn2 = FunAddTwo(a=3) + outputs2 = nn2(rerun=True, cache_dir=cache_dir2, cache_locations=cache_dir) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert outputs2.out == 5 # checking if the second task run the interface again - assert nn.output_dir.exists() - assert nn2.output_dir.exists() + assert num_python_cache_dirs(cache_dir) == 1 + assert num_python_cache_dirs(cache_dir2) def test_task_nostate_cachelocations_updated(plugin, tmp_path): @@ -720,22 +693,29 @@ def test_task_nostate_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir) - with Submitter(worker=plugin) as sub: - sub(nn) + nn = FunAddTwo(a=3) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: + results = sub(nn) + + nn2 = FunAddTwo(a=3) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results1 = sub(nn2) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir) # updating cache location to non-existing dir - with Submitter(worker=plugin) as sub: - sub(nn2, cache_locations=cache_dir1) + with Submitter( + worker=plugin, cache_locations=cache_dir1, cache_dir=tmp_path + ) as sub: + results2 = sub(nn2) # checking the results - results2 = nn2.result() - assert results2.output.out == 5 + + assert results2.outputs.out == 5 # checking if both tasks run interface - assert nn.output_dir.exists() - assert nn2.output_dir.exists() + assert results.output_dir == results1.output_dir + assert results.output_dir != results2.output_dir # Tests for tasks with states (i.e. with splitter) @@ -750,89 +730,68 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): a_in = np.array(a_in) nn = FunAddTwo().split("a", a=a_in) - nn.cache_dir = tmp_path + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] assert (nn.a == np.array([3, 5])).all() - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] - for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - expected_ind = [({"NA.a": 0}, 5), ({"NA.a": 1}, 7)] - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == res[1] def test_task_state_1a(plugin, tmp_path): """task with the simplest splitter (inputs set separately)""" nn = FunAddTwo() - nn.split("a", a=[1, 2]) - nn.inputs.a = StateArray([3, 5]) - nn.cache_dir = tmp_path + nn = nn.split("a", a=[1, 2]) + nn.a = StateArray([3, 5]) + + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] assert (nn.a == np.array([3, 5])).all() - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert results.outputs.out[i] == res[1] def test_task_state_singl_1(plugin, tmp_path): """Tasks with two inputs and a splitter (no combiner) one input is a single value, the other is in the splitter and combiner """ - nn = FunAddVar().split("a", a=[3, 5], b=10) - nn.cache_dir = tmp_path + nn = FunAddVar(b=10).split("a", a=[3, 5]) + state = get_state(nn) assert nn.a == [3, 5] assert nn.b == 10 - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.state.splitter_final == "NA.a" - assert nn.state.splitter_rpn_final == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert state.splitter_final == "NA.a" + assert state.splitter_rpn_final == ["NA.a"] - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results expected = [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)] - results = nn.result() + for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert results.outputs.out[i] == res[1] # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.output_dir.exists() @pytest.mark.parametrize( @@ -882,62 +841,41 @@ def test_task_state_2( elif input_type == "mixed": a_in = np.array(a_in) nn = FunAddVar().split(splitter, a=a_in, b=b_in) - nn.cache_dir = tmp_path + state = get_state(nn) assert (nn.a == np.array([3, 5])).all() assert (nn.b == np.array([10, 20])).all() - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn - assert nn.state.splitter_final == state_splitter - assert nn.state.splitter_rpn_final == state_rpn + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn + assert state.splitter_final == state_splitter + assert state.splitter_rpn_final == state_rpn - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() - for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == res[1] def test_task_state_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" nn = FunAddTwo().split("a", a=[]) - nn.cache_dir = tmp_path + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] assert nn.a == [] - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + expected = [] for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the output_dir - assert nn.output_dir == [] + assert results.outputs.out[i] == res[1] @pytest.mark.parametrize("input_type", ["list", "array"]) @@ -947,74 +885,62 @@ def test_task_state_4(plugin, input_type, tmp_path): if input_type == "array": lst_in = np.array(lst_in, dtype=int) nn = Moment(n=3).split("lst", lst=lst_in) - nn.cache_dir = tmp_path + state = get_state(nn) - assert np.allclose(nn.inputs.n, 3) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == "NA.lst" + assert np.allclose(nn.n, 3) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == "NA.lst" - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking that split is done across dim 0 - el_0 = nn.state.states_val[0]["NA.lst"] + el_0 = state.states_val[0]["NA.lst"] if input_type == "list": assert el_0 == [2, 3, 4] elif input_type == "array": assert el_0 == [2, 3, 4] # checking the results - results = nn.result() + for i, expected in enumerate([33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_4a(plugin, tmp_path): """task with a tuple as an input, and a simple splitter""" nn = Moment(n=3).split("lst", lst=[(2, 3, 4), (1, 2, 3)]) - nn.cache_dir = tmp_path + state = get_state(nn) - assert np.allclose(nn.inputs.n, 3) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == "NA.lst" + assert np.allclose(nn.n, 3) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == "NA.lst" - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + for i, expected in enumerate([33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_5(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter""" nn = Moment().split(("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]) - nn.cache_dir = tmp_path + state = get_state(nn) - assert np.allclose(nn.inputs.n, [1, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ("NA.n", "NA.lst") + assert np.allclose(nn.n, [1, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == ("NA.n", "NA.lst") - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + for i, expected in enumerate([3, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_5_exception(plugin, tmp_path): @@ -1022,112 +948,81 @@ def test_task_state_5_exception(plugin, tmp_path): the shapes are not matching, so exception should be raised """ nn = Moment().split(("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]]) - nn.cache_dir = tmp_path - assert np.allclose(nn.inputs.n, [1, 3, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ("NA.n", "NA.lst") + assert np.allclose(nn.n, [1, 3, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: - sub(nn) + get_state(nn) + assert "shape" in str(excinfo.value) def test_task_state_6(plugin, tmp_path): """ask with a list as an input, and the variable is part of the outer splitter""" nn = Moment().split(["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]) - nn.cache_dir = tmp_path + state = get_state(nn) - assert np.allclose(nn.inputs.n, [1, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ["NA.n", "NA.lst"] + assert np.allclose(nn.n, [1, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == ["NA.n", "NA.lst"] - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + for i, expected in enumerate([3, 2, 33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected def test_task_state_6a(plugin, tmp_path): """ask with a tuple as an input, and the variable is part of the outer splitter""" nn = Moment().split(["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)]) - nn.cache_dir = tmp_path + state = get_state(nn) - assert np.allclose(nn.inputs.n, [1, 3]) - assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) - assert nn.state.splitter == ["NA.n", "NA.lst"] + assert np.allclose(nn.n, [1, 3]) + assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) + assert state.splitter == ["NA.n", "NA.lst"] - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + for i, expected in enumerate([3, 2, 33, 12]): - assert results[i].output.out == expected - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out[i] == expected @pytest.mark.flaky(reruns=2) # when dask def test_task_state_comb_1(plugin_dask_opt, tmp_path): """task with the simplest splitter and combiner""" nn = FunAddTwo().split(a=[3, 5]).combine(combiner="a") - nn.cache_dir = tmp_path + state = get_state(nn) assert (nn.a == np.array([3, 5])).all() - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.state.combiner == ["NA.a"] - assert nn.state.splitter_final is None - assert nn.state.splitter_rpn_final == [] + assert state.splitter == ["NA.a"] + assert state.splitter_rpn == ["NA.a"] + assert state.combiner == ["NA.a"] + assert state.splitter_final is None + assert state.splitter_rpn_final == [] - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + results = sub(nn) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] + assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] + assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] # checking the results - results = nn.result() - # fully combined (no nested list) - combined_results = [res.output.out for res in results] - assert combined_results == [5, 7] - expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] - expected_ind = [({"NA.a": 0}, 5), ({"NA.a": 1}, 7)] - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) - results_verb_val = nn.result(return_inputs="val") - for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = nn.result(return_inputs="ind") - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + # fully combined (no nested list) + assert results.outputs.out == [5, 7] @pytest.mark.parametrize( "splitter, combiner, state_splitter, state_rpn, state_combiner, state_combiner_all, " - "state_splitter_final, state_rpn_final, expected, expected_val", + "state_splitter_final, state_rpn_final, expected", # , expected_val", [ ( ("a", "b"), @@ -1139,7 +1034,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): None, [], [13, 25], - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], ), ( ("a", "b"), @@ -1151,7 +1046,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): None, [], [13, 25], - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 20}, 25)], ), ( ["a", "b"], @@ -1163,10 +1058,10 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): "NA.b", ["NA.b"], [[13, 15], [23, 25]], - [ - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)], - [({"NA.a": 3, "NA.b": 20}, 23), ({"NA.a": 5, "NA.b": 20}, 25)], - ], + # [ + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)], + # [({"NA.a": 3, "NA.b": 20}, 23), ({"NA.a": 5, "NA.b": 20}, 25)], + # ], ), ( ["a", "b"], @@ -1178,10 +1073,10 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): "NA.a", ["NA.a"], [[13, 23], [15, 25]], - [ - [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 3, "NA.b": 20}, 23)], - [({"NA.a": 5, "NA.b": 10}, 15), ({"NA.a": 5, "NA.b": 20}, 25)], - ], + # [ + # [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 3, "NA.b": 20}, 23)], + # [({"NA.a": 5, "NA.b": 10}, 15), ({"NA.a": 5, "NA.b": 20}, 25)], + # ], ), ( ["a", "b"], @@ -1193,12 +1088,12 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): None, [], [13, 23, 15, 25], - [ - ({"NA.a": 3, "NA.b": 10}, 13), - ({"NA.a": 3, "NA.b": 20}, 23), - ({"NA.a": 5, "NA.b": 10}, 15), - ({"NA.a": 5, "NA.b": 20}, 25), - ], + # [ + # ({"NA.a": 3, "NA.b": 10}, 13), + # ({"NA.a": 3, "NA.b": 20}, 23), + # ({"NA.a": 5, "NA.b": 10}, 15), + # ({"NA.a": 5, "NA.b": 20}, 25), + # ], ), ], ) @@ -1213,100 +1108,84 @@ def test_task_state_comb_2( state_splitter_final, state_rpn_final, expected, - expected_val, + # expected_val, tmp_path, ): """Tasks with scalar and outer splitters and partial or full combiners""" nn = FunAddVar().split(splitter, a=[3, 5], b=[10, 20]).combine(combiner=combiner) - nn.cache_dir = tmp_path + state = get_state(nn) assert (nn.a == np.array([3, 5])).all() - assert nn.state.splitter == state_splitter - assert nn.state.splitter_rpn == state_rpn - assert nn.state.combiner == state_combiner + assert state.splitter == state_splitter + assert state.splitter_rpn == state_rpn + assert state.combiner == state_combiner - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) - assert nn.state.splitter_final == state_splitter_final - assert nn.state.splitter_rpn_final == state_rpn_final - assert set(nn.state.current_combiner_all) == set(state_combiner_all) + assert state.splitter_final == state_splitter_final + assert state.splitter_rpn_final == state_rpn_final + assert set(state.current_combiner_all) == set(state_combiner_all) # checking the results - results = nn.result() + # checking the return_inputs option, either return_inputs is True or "val", # it should give values of inputs that corresponds to the specific element - results_verb = nn.result(return_inputs=True) + # results_verb = nn.result(return_inputs=True) - if nn.state.splitter_rpn_final: + if state.splitter_rpn_final: for i, res in enumerate(expected): - assert [res.output.out for res in results[i]] == res + assert results.outputs.out == res # results_verb - for i, res_l in enumerate(expected_val): - for j, res in enumerate(res_l): - assert (results_verb[i][j][0], results_verb[i][j][1].output.out) == res + # for i, res_l in enumerate(expected_val): + # for j, res in enumerate(res_l): + # assert (results_verb[i][j][0], results_verb[i][j][1].output.out) == res # if the combiner is full expected is "a flat list" else: - assert [res.output.out for res in results] == expected - for i, res in enumerate(expected_val): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out == expected + # for i, res in enumerate(expected_val): + # assert (results_verb[i][0], results_verb[i][1].output.out) == res def test_task_state_comb_singl_1(plugin, tmp_path): """Tasks with two inputs; one input is a single value, the other is in the splitter and combiner """ - nn = FunAddVar().split("a", a=[3, 5], b=10).combine(combiner="a") - nn.cache_dir = tmp_path + nn = FunAddVar(b=10).split("a", a=[3, 5]).combine(combiner="a") + state = get_state(nn) assert nn.a == [3, 5] assert nn.b == 10 - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - assert nn.state.combiner == ["NA.a"] - assert nn.state.splitter_final is None - assert nn.state.splitter_rpn_final == [] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] + assert state.combiner == ["NA.a"] + assert state.splitter_final is None + assert state.splitter_rpn_final == [] - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) - # checking the results - expected = ({}, [13, 15]) - results = nn.result() - # full combiner, no nested list - combined_results = [res.output.out for res in results] - assert combined_results == expected[1] - # checking the output_dir - assert nn.output_dir - for odir in nn.output_dir: - assert odir.exists() + assert results.outputs.out == [13, 15] def test_task_state_comb_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" nn = FunAddTwo().split("a", a=[]).combine(combiner=["a"]) - nn.cache_dir = tmp_path + state = get_state(nn) - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] + assert state.splitter == "NA.a" + assert state.splitter_rpn == ["NA.a"] assert nn.a == [] - with Submitter(worker=plugin) as sub: - sub(nn) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + results = sub(nn) # checking the results - results = nn.result() + expected = [] for i, res in enumerate(expected): - assert results[i].output.out == res[1] - # checking the output_dir - assert nn.output_dir == [] + assert results.outputs.out[i] == res[1] def test_task_state_comb_order(): @@ -1316,41 +1195,42 @@ def test_task_state_comb_order(): # single combiner "a" - will create two lists, first one for b=3, second for b=5 nn_a = FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner="a") - assert nn_a.state.combiner == ["NA.a"] + state_a = get_state(nn_a) + assert state_a.combiner == ["NA.a"] - results_a = nn_a() - combined_results_a = [[res.output.out for res in res_l] for res_l in results_a] - assert combined_results_a == [[13, 23], [15, 25]] + outputs = nn_a() + # combined_results_a = [[res.output.out for res in res_l] for res_l in results_a] + assert outputs.out == [[13, 23], [15, 25]] # single combiner "b" - will create two lists, first one for a=10, second for a=20 nn_b = FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner="b") - assert nn_b.state.combiner == ["NA.b"] + state_b = get_state(nn_b) + assert state_b.combiner == ["NA.b"] - results_b = nn_b() - combined_results_b = [[res.output.out for res in res_l] for res_l in results_b] - assert combined_results_b == [[13, 15], [23, 25]] + outputs_b = nn_b() + # combined_results_b = [[res.output.out for res in res_l] for res_l in results_b] + assert outputs_b.out == [[13, 15], [23, 25]] # combiner with both fields ["a", "b"] - will create one list nn_ab = ( FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner=["a", "b"]) ) - assert nn_ab.state.combiner == ["NA.a", "NA.b"] + state_ab = get_state(nn_ab) + assert state_ab.combiner == ["NA.a", "NA.b"] - results_ab = nn_ab() - # full combiner, no nested list - combined_results_ab = [res.output.out for res in results_ab] - assert combined_results_ab == [13, 15, 23, 25] + outputs_ab = nn_ab() + assert outputs_ab.out == [13, 15, 23, 25] # combiner with both fields ["b", "a"] - will create the same list as nn_ab # no difference in the order for setting combiner nn_ba = ( FunAddVar().split(["a", "b"], a=[10, 20], b=[3, 5]).combine(combiner=["b", "a"]) ) - assert nn_ba.state.combiner == ["NA.b", "NA.a"] + state_ba = get_state(nn_ba) + assert state_ba.combiner == ["NA.b", "NA.a"] - results_ba = nn_ba() - combined_results_ba = [res.output.out for res in results_ba] - assert combined_results_ba == [13, 15, 23, 25] + outputs_ba = nn_ba() + assert outputs_ba.out == [13, 15, 23, 25] # Testing with container dimensions for the input @@ -1359,30 +1239,22 @@ def test_task_state_comb_order(): def test_task_state_contdim_1(tmp_path): """task with a spliter and container dimension for one of the value""" task_4var = Op4Var( - name="op_4var", a="a1", - cache_dir=tmp_path, - ) - task_4var.split( + ).split( ("b", ["c", "d"]), b=[["b1", "b2"], ["b3", "b4"]], c=["c1", "c2"], d=["d1", "d2"], cont_dim={"b": 2}, ) - task_4var() - res = task_4var.result() - assert len(res) == 4 - assert res[3].output.out == "a1 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 4 + assert outputs.out[3] == "a1 b4 c2 d2" def test_task_state_contdim_2(tmp_path): """task with a splitter and container dimension for one of the value""" - task_4var = Op4Var( - name="op_4var", - cache_dir=tmp_path, - ) - task_4var.split( + task_4var = Op4Var().split( ["a", ("b", ["c", "d"])], cont_dim={"b": 2}, a=["a1", "a2"], @@ -1390,50 +1262,46 @@ def test_task_state_contdim_2(tmp_path): c=["c1", "c2"], d=["d1", "d2"], ) - task_4var() - res = task_4var.result() - assert len(res) == 8 - assert res[7].output.out == "a2 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 8 + assert outputs.out[7] == "a2 b4 c2 d2" def test_task_state_comb_contdim_1(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" - task_4var = Op4Var( - name="op_4var", - a="a1", - cache_dir=tmp_path, + task_4var = ( + Op4Var(a="a1") + .split( + ("b", ["c", "d"]), + cont_dim={"b": 2}, + b=[["b1", "b2"], ["b3", "b4"]], + c=["c1", "c2"], + d=["d1", "d2"], + ) + .combine("b") ) - task_4var.split( - ("b", ["c", "d"]), - cont_dim={"b": 2}, - b=[["b1", "b2"], ["b3", "b4"]], - c=["c1", "c2"], - d=["d1", "d2"], - ).combine("b") - task_4var() - res = task_4var.result() - assert len(res) == 4 - assert res[3].output.out == "a1 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 4 + assert outputs.out[3] == "a1 b4 c2 d2" def test_task_state_comb_contdim_2(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" - task_4var = Op4Var( - name="op_4var", - cache_dir=tmp_path, + task_4var = ( + Op4Var() + .split( + ["a", ("b", ["c", "d"])], + a=["a1", "a2"], + b=[["b1", "b2"], ["b3", "b4"]], + c=["c1", "c2"], + d=["d1", "d2"], + cont_dim={"b": 2}, + ) + .combine("a") ) - task_4var.split( - ["a", ("b", ["c", "d"])], - a=["a1", "a2"], - b=[["b1", "b2"], ["b3", "b4"]], - c=["c1", "c2"], - d=["d1", "d2"], - cont_dim={"b": 2}, - ).combine("a") - task_4var() - res = task_4var.result() - assert len(res) == 4 - assert res[3][1].output.out == "a2 b4 c2 d2" + outputs = task_4var(cache_dir=tmp_path) + assert len(outputs.out) == 4 + assert outputs.out[3][1] == "a2 b4 c2 d2" # Testing caching for tasks with states @@ -1444,19 +1312,20 @@ def test_task_state_cachedir(plugin_dask_opt, tmp_path): """task with a state and provided cache_dir using pytest tmp_path""" cache_dir = tmp_path / "test_task_nostate" cache_dir.mkdir() - nn = FunAddTwo(cache_dir=cache_dir).split("a", a=[3, 5]) + nn = FunAddTwo().split("a", a=[3, 5]) + state = get_state(nn) - assert nn.state.splitter == "NA.a" + assert state.splitter == "NA.a" assert (nn.a == np.array([3, 5])).all() - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) # checking the results - results = nn.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results[i].output.out == res[1] + assert results.outputs.out[i] == res[1] def test_task_state_cachelocations(plugin, tmp_path): @@ -1469,24 +1338,24 @@ def test_task_state_cachelocations(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir).split("a", a=[3, 5]) - with Submitter(worker=plugin) as sub: + nn = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: sub(nn) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir).split( - "a", a=[3, 5] - ) - with Submitter(worker=plugin) as sub: - sub(nn2) + nn2 = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2) # checking the results - results2 = nn2.result() expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results2[i].output.out == res[1] + assert results2.outputs.out[i] == res[1] - assert all([dir.exists() for dir in nn.output_dir]) - assert not any([dir.exists() for dir in nn2.output_dir]) + # Would ideally check for all nodes of the workflows + assert num_python_cache_dirs(cache_dir) == 2 + assert not num_python_cache_dirs(cache_dir2) def test_task_state_cachelocations_forcererun(plugin, tmp_path): @@ -1500,25 +1369,25 @@ def test_task_state_cachelocations_forcererun(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(a=3, cache_dir=cache_dir).split("a", a=[3, 5]) - with Submitter(worker=plugin) as sub: + nn = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: sub(nn) - nn2 = FunAddTwo(a=3, cache_dir=cache_dir2, cache_locations=cache_dir).split( - "a", a=[3, 5] - ) - with Submitter(worker=plugin) as sub: - sub(nn2, rerun=True) + nn2 = FunAddTwo(a=3).split("a", a=[3, 5]) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2, rerun=True) # checking the results - results2 = nn2.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results2[i].output.out == res[1] + assert results2.outputs.out[i] == res[1] # both workflows should be run - assert all([dir.exists() for dir in nn.output_dir]) - assert all([dir.exists() for dir in nn2.output_dir]) + assert num_python_cache_dirs(cache_dir) == 2 + assert num_python_cache_dirs(cache_dir2) == 2 def test_task_state_cachelocations_updated(plugin, tmp_path): @@ -1535,25 +1404,25 @@ def test_task_state_cachelocations_updated(plugin, tmp_path): cache_dir2 = tmp_path / "test_task_nostate2" cache_dir2.mkdir() - nn = FunAddTwo(cache_dir=cache_dir).split("a", a=[3, 5]) - with Submitter(worker=plugin) as sub: + nn = FunAddTwo().split("a", a=[3, 5]) + with Submitter(worker=plugin, cache_dir=cache_dir) as sub: sub(nn) - nn2 = FunAddTwo(cache_dir=cache_dir2, cache_locations=cache_dir).split( - "a", a=[3, 5] - ) - with Submitter(worker=plugin) as sub: - sub(nn2, cache_locations=cache_dir1) + nn2 = FunAddTwo().split("a", a=[3, 5]) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(nn2) # checking the results - results2 = nn2.result() + expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] for i, res in enumerate(expected): - assert results2[i].output.out == res[1] + assert results2.outputs.out[i] == res[1] # both workflows should be run - assert all([dir.exists() for dir in nn.output_dir]) - assert all([dir.exists() for dir in nn2.output_dir]) + assert num_python_cache_dirs(cache_dir) == 2 + assert num_python_cache_dirs(cache_dir2) == 2 def test_task_files_cachelocations(plugin_dask_opt, tmp_path): @@ -1573,21 +1442,22 @@ def test_task_files_cachelocations(plugin_dask_opt, tmp_path): input2 = input_dir / "input2.txt" input2.write_text("test") - nn = FunFile(filename=input1, cache_dir=cache_dir) - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn) + nn = FunFile(filename=input1) + with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: + results = sub(nn) - nn2 = FunFile(filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir) - with Submitter(worker=plugin_dask_opt) as sub: - sub(nn2) + nn2 = FunFile(filename=input2) + with Submitter( + worker=plugin_dask_opt, cache_dir=cache_dir2, cache_locations=cache_dir + ) as sub: + results2 = sub(nn2) # checking the results - results2 = nn2.result() - assert results2.output.out == "test" + + assert results2.outputs.out == "test" # checking if the second task didn't run the interface again - assert nn.output_dir.exists() - assert not nn2.output_dir.exists() + assert results.output_dir == results2.output_dir class OverriddenContentsFile(File): @@ -1610,10 +1480,10 @@ def byte_chunks(self, **kwargs) -> ty.Generator[ty.Tuple[str, bytes], None, None yield from super().byte_chunks(**kwargs) @property - def contents(self): + def raw_contents(self): if self._contents is not None: return self._contents - return super().contents + return super().raw_contents def test_task_files_persistentcache(tmp_path): @@ -1629,21 +1499,14 @@ def test_task_files_persistentcache(tmp_path): @python.define def read_contents(x: OverriddenContentsFile) -> bytes: - return x.contents + return x.raw_contents - assert ( - read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out - == b"foo" - ) + assert read_contents(x=test_file)(cache_dir=cache_dir).out == b"foo" test_file._contents = b"bar" # should return result from the first run using the persistent cache - assert ( - read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out - == b"foo" - ) + assert read_contents(x=test_file)(cache_dir=cache_dir).out == b"foo" time.sleep(2) # Windows has a 2-second resolution for mtime test_file_path.touch() # update the mtime to invalidate the persistent cache value assert ( - read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out - == b"bar" + read_contents(x=test_file)(cache_dir=cache_dir).out == b"bar" ) # returns the overridden value From 9c7d57e4117df0ef7261ca7a755e4e2ecfbc20c1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 28 Feb 2025 11:28:46 +1100 Subject: [PATCH 290/342] debugging splitting and combining --- pydra/design/base.py | 23 +++++------- pydra/design/shell.py | 5 +++ pydra/design/tests/test_shell.py | 1 - pydra/engine/core.py | 2 +- pydra/engine/lazy.py | 11 +++--- pydra/engine/node.py | 4 +-- pydra/engine/specs.py | 2 +- pydra/engine/state.py | 21 +++++++---- pydra/engine/submitter.py | 20 +++++++++-- pydra/engine/tests/test_node_task.py | 48 ++++++++++++++++++++++---- pydra/engine/tests/test_tasks_files.py | 23 ------------ 11 files changed, 96 insertions(+), 64 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 22ab9ebedb..6b959c8ea5 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -416,11 +416,6 @@ def make_task_def( spec_type._check_arg_refs(inputs, outputs) - for inpt in inputs.values(): - set_none_default_if_optional(inpt) - for outpt in inputs.values(): - set_none_default_if_optional(outpt) - if name is None and klass is not None: name = klass.__name__ if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: @@ -459,10 +454,10 @@ def make_task_def( if getattr(arg, "path_template", False): if is_optional(arg.type): field_type = Path | bool | None - # Will default to None and not be inserted into the command + attrs_kwargs = {"default": None} else: field_type = Path | bool - attrs_kwargs = {"default": True} + attrs_kwargs = {"default": True} # use the template by default elif is_optional(arg.type): field_type = Path | None else: @@ -988,12 +983,10 @@ def check_explicit_fields_are_none(klass, inputs, outputs): def _get_attrs_kwargs(field: Field) -> dict[str, ty.Any]: kwargs = {} - if not hasattr(field, "default"): - kwargs["factory"] = nothing_factory - elif field.default is not NO_DEFAULT: + if field.default is not NO_DEFAULT: kwargs["default"] = field.default - elif is_optional(field.type): - kwargs["default"] = None + # elif is_optional(field.type): + # kwargs["default"] = None else: kwargs["factory"] = nothing_factory if field.hash_eq: @@ -1005,9 +998,9 @@ def nothing_factory(): return attrs.NOTHING -def set_none_default_if_optional(field: Field) -> None: - if is_optional(field.type) and field.default is NO_DEFAULT: - field.default = None +# def set_none_default_if_optional(field: Field) -> None: +# if is_optional(field.type) and field.default is NO_DEFAULT: +# field.default = None white_space_re = re.compile(r"\s+") diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 1f0e75543c..7d72050a8e 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -386,6 +386,7 @@ def make( input_helps=input_helps, output_helps=output_helps, ) + if name: class_name = name else: @@ -679,6 +680,10 @@ def from_type_str(type_str) -> type: if ext_type.ext is not None: path_template = name + ext_type.ext kwds["path_template"] = path_template + # Set the default value to None if the field is optional and no default is + # provided + if is_optional(type_) and "default" not in kwds: + kwds["default"] = None if option is None: add_arg(name, field_type, kwds) else: diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 184c8e05e2..4c2c2f91bf 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -434,7 +434,6 @@ def test_interface_template_with_type_overrides(): name="int_arg", argstr="--int-arg", type=int | None, - default=None, position=5, ), shell.arg( diff --git a/pydra/engine/core.py b/pydra/engine/core.py index d2c1ce8938..e68f4959f4 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -709,7 +709,7 @@ def construct( f"{len(output_lazy_fields)} ({output_lazy_fields})" ) for outpt, outpt_lf in zip(output_fields, output_lazy_fields): - # Automatically combine any uncombined state arrays into lists + # Automatically combine any uncombined state arrays into a single lists if TypeParser.get_origin(outpt_lf._type) is StateArray: outpt_lf._type = list[TypeParser.strip_splits(outpt_lf._type)[0]] setattr(outputs, outpt.name, outpt_lf) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index fd1f628a24..e2e52908d8 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -152,19 +152,18 @@ def _get_value( value : Any the resolved value of the lazy-field """ - from pydra.utils.typing import ( - TypeParser, - ) # pylint: disable=import-outside-toplevel from pydra.engine.state import StateIndex if state_index is None: state_index = StateIndex() - task = graph.node(self._node.name).task(state_index) - _, split_depth = TypeParser.strip_splits(self._type) + node_exec = graph.node(self._node.name) + task = node_exec.task(state_index) + split_depth = node_exec.node.state.depth() if node_exec.node.state else 0 def get_nested(task: "Task[DefType]", depth: int): - if isinstance(task, StateArray): + if depth: + assert isinstance(task, StateArray) val = [get_nested(task=t, depth=depth - 1) for t in task] if depth: val = StateArray[self._type](val) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 8fd3bf0415..f3639e0f81 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -134,7 +134,7 @@ def lzout(self) -> OutputType: type_, _ = TypeParser.strip_splits(outpt._type) if self._state.combiner: type_ = list[type_] - for _ in range(self._state.depth - int(bool(self._state.combiner))): + for _ in range(self._state.depth()): type_ = StateArray[type_] outpt._type = type_ # Flag the output lazy fields as being not typed checked (i.e. assigned to @@ -272,7 +272,7 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: if ( isinstance(val, lazy.LazyOutField) and val._node.state - and val._node.state.depth + and val._node.state.depth() ): node: Node = val._node # variables that are part of inner splitters should be treated as a containers diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index fba330841c..2508ea0a80 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -231,7 +231,6 @@ def __call__( cache_locations=cache_locations, messenger_args=messenger_args, messengers=messengers, - rerun=rerun, environment=environment, worker=worker, **kwargs, @@ -239,6 +238,7 @@ def __call__( result = sub( self, hooks=hooks, + rerun=rerun, ) except TypeError as e: # Catch any inadvertent passing of task definition parameters to the diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 11a4290bcf..d078c1065f 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -231,29 +231,38 @@ def names(self): names.append(token) return names - @property - def depth(self) -> int: + def depth(self, after_combine: bool = True) -> int: """Return the number of splits of the state, i.e. the number nested state arrays to wrap around the type of lazy out fields + Parameters + ---------- + after_combine : :obj:`bool` + if True, the depth is after combining the fields, otherwise it is before + any combinations + Returns ------- int - number of uncombined independent splits (i.e. linked splits only add 1) + number of splits in the state (i.e. linked splits only add 1) """ depth = 0 stack = [] + + def included(s): + return s not in self.combiner if after_combine else True + for spl in self.splitter_rpn: if spl in [".", "*"]: if spl == ".": - depth += int(all(s not in self.combiner for s in stack)) + depth += int(all(included(s) for s in stack)) else: assert spl == "*" - depth += len([s for s in stack if s not in self.combiner]) + depth += len([s for s in stack if included(s)]) stack = [] else: stack.append(spl) - remaining_stack = [s for s in stack if s not in self.combiner] + remaining_stack = [s for s in stack if included(s)] return depth + len(remaining_stack) @property diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 711322552d..364f9c3217 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -26,6 +26,7 @@ from pydra.utils.messenger import AuditFlag, Messenger from pydra.utils import default_run_cache_dir from pydra.design import workflow +from .state import State import logging logger = logging.getLogger("pydra.submitter") @@ -35,7 +36,7 @@ from .specs import WorkflowDef, TaskDef, TaskOutputs, TaskHooks, Result from .core import Workflow from .environments import Environment - from .state import State + DefType = ty.TypeVar("DefType", bound="TaskDef") OutputType = ty.TypeVar("OutputType", bound="TaskOutputs") @@ -209,7 +210,22 @@ def __call__( if task_def._splitter: from pydra.engine.specs import TaskDef - output_types = {o.name: list[o.type] for o in list_fields(task_def.Outputs)} + state = State( + name="not-important", + definition=task_def, + splitter=task_def._splitter, + combiner=task_def._combiner, + ) + list_depth = 2 if state.depth(after_combine=False) != state.depth() else 1 + + def wrap_type(tp): + for _ in range(list_depth): + tp = list[tp] + return tp + + output_types = { + o.name: wrap_type(o.type) for o in list_fields(task_def.Outputs) + } @workflow.define(outputs=output_types) def Split( diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 49dff68b8a..1abfc1d96c 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -371,6 +371,7 @@ def test_task_nostate_1(plugin_dask_opt, tmp_path): with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results assert results.outputs.out == 5 @@ -384,6 +385,7 @@ def test_task_nostate_1_call(tmp_path): nn = FunAddTwo(a=3) with Submitter(cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results assert results.outputs.out == 5 @@ -402,6 +404,7 @@ def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -421,6 +424,7 @@ def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): with Submitter(cache_dir=tmp_path, worker=plugin_dask_opt) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -440,6 +444,7 @@ def test_task_nostate_2(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -456,6 +461,7 @@ def test_task_nostate_3(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -474,6 +480,7 @@ def test_task_nostate_4(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -513,9 +520,9 @@ def test_task_nostate_6a_exception(): """checking if the function gets the attrs.Nothing value""" nn = FunAddVarNone(a=2) assert nn.b is attrs.NOTHING - with pytest.raises(TypeError) as excinfo: + with pytest.raises(ValueError) as excinfo: nn() - assert "unsupported" in str(excinfo.value) + assert "Mandatory field 'b' is not set" in str(excinfo.value) def test_task_nostate_7(): @@ -541,6 +548,7 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -561,6 +569,7 @@ def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -583,12 +592,14 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): nn = FunAddTwo(a=3) with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) nn2 = FunAddTwo(a=3) with Submitter( worker=plugin_dask_opt, cache_dir=cache_dir2, cache_locations=cache_dir ) as sub: results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results @@ -612,6 +623,7 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): nn = FunAddTwo(a=3) with Submitter(worker=plugin, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) nn2 = FunAddTwo(a=3) with Submitter( @@ -696,18 +708,21 @@ def test_task_nostate_cachelocations_updated(plugin, tmp_path): nn = FunAddTwo(a=3) with Submitter(worker=plugin, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) nn2 = FunAddTwo(a=3) with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir ) as sub: results1 = sub(nn2) + assert not results1.errored, "\n".join(results.errors["error message"]) # updating cache location to non-existing dir with Submitter( worker=plugin, cache_locations=cache_dir1, cache_dir=tmp_path ) as sub: results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results @@ -738,6 +753,7 @@ def test_task_state_1(plugin_dask_opt, input_type, tmp_path): with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -760,6 +776,7 @@ def test_task_state_1a(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -784,6 +801,7 @@ def test_task_state_singl_1(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results expected = [({"NA.a": 3, "NA.b": 10}, 13), ({"NA.a": 5, "NA.b": 10}, 15)] @@ -852,6 +870,7 @@ def test_task_state_2( with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -870,6 +889,7 @@ def test_task_state_3(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -893,6 +913,7 @@ def test_task_state_4(plugin, input_type, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking that split is done across dim 0 el_0 = state.states_val[0]["NA.lst"] @@ -918,6 +939,7 @@ def test_task_state_4a(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -936,6 +958,7 @@ def test_task_state_5(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -969,6 +992,7 @@ def test_task_state_6(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -987,6 +1011,7 @@ def test_task_state_6a(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -1010,6 +1035,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) assert state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] assert state.states_val == [{"NA.a": 3}, {"NA.a": 5}] @@ -1123,6 +1149,7 @@ def test_task_state_comb_2( with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) assert state.splitter_final == state_splitter_final assert state.splitter_rpn_final == state_rpn_final @@ -1165,6 +1192,7 @@ def test_task_state_comb_singl_1(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) assert results.outputs.out == [13, 15] @@ -1180,6 +1208,7 @@ def test_task_state_comb_3(plugin, tmp_path): with Submitter(worker=plugin, cache_dir=tmp_path) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -1188,7 +1217,7 @@ def test_task_state_comb_3(plugin, tmp_path): assert results.outputs.out[i] == res[1] -def test_task_state_comb_order(): +def test_task_state_comb_order(tmp_path): """tasks with an outer splitter and various combiner; showing the order of results """ @@ -1198,7 +1227,7 @@ def test_task_state_comb_order(): state_a = get_state(nn_a) assert state_a.combiner == ["NA.a"] - outputs = nn_a() + outputs = nn_a(cache_dir=tmp_path / "cache") # combined_results_a = [[res.output.out for res in res_l] for res_l in results_a] assert outputs.out == [[13, 23], [15, 25]] @@ -1207,7 +1236,7 @@ def test_task_state_comb_order(): state_b = get_state(nn_b) assert state_b.combiner == ["NA.b"] - outputs_b = nn_b() + outputs_b = nn_b(cache_dir=tmp_path / "cache_b") # combined_results_b = [[res.output.out for res in res_l] for res_l in results_b] assert outputs_b.out == [[13, 15], [23, 25]] @@ -1218,7 +1247,7 @@ def test_task_state_comb_order(): state_ab = get_state(nn_ab) assert state_ab.combiner == ["NA.a", "NA.b"] - outputs_ab = nn_ab() + outputs_ab = nn_ab(cache_dir=tmp_path / "cache_ab") assert outputs_ab.out == [13, 15, 23, 25] # combiner with both fields ["b", "a"] - will create the same list as nn_ab @@ -1229,7 +1258,7 @@ def test_task_state_comb_order(): state_ba = get_state(nn_ba) assert state_ba.combiner == ["NA.b", "NA.a"] - outputs_ba = nn_ba() + outputs_ba = nn_ba(cache_dir=tmp_path / "cache_ba") assert outputs_ba.out == [13, 15, 23, 25] @@ -1320,6 +1349,7 @@ def test_task_state_cachedir(plugin_dask_opt, tmp_path): with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) # checking the results @@ -1347,6 +1377,7 @@ def test_task_state_cachelocations(plugin, tmp_path): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir ) as sub: results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results expected = [({"NA.a": 3}, 5), ({"NA.a": 5}, 7)] @@ -1413,6 +1444,7 @@ def test_task_state_cachelocations_updated(plugin, tmp_path): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results @@ -1445,12 +1477,14 @@ def test_task_files_cachelocations(plugin_dask_opt, tmp_path): nn = FunFile(filename=input1) with Submitter(worker=plugin_dask_opt, cache_dir=cache_dir) as sub: results = sub(nn) + assert not results.errored, "\n".join(results.errors["error message"]) nn2 = FunFile(filename=input2) with Submitter( worker=plugin_dask_opt, cache_dir=cache_dir2, cache_locations=cache_dir ) as sub: results2 = sub(nn2) + assert not results2.errored, "\n".join(results.errors["error message"]) # checking the results diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index 32d35cef07..96a4f940a9 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -191,26 +191,3 @@ def test_broken_dir_link1(tmpdir): with pytest.raises(FileNotFoundError): DirCountFileAnnot(dirpath=Path(dir1)) - - -def test_broken_dir_link2(tmpdir): - # valid dirs with broken symlink(s) are hashed - dir2 = tmpdir.join("dir2") - os.mkdir(dir2) - file1 = dir2.join("file1") - file2 = dir2.join("file2") - file1.open("w+").close() - file2.open("w+").close() - - file1_link = dir2.join("file1_link") - os.symlink(file1, file1_link) - os.remove(file1) # file1_link is broken - - nn = DirCountFile(dirpath=dir2) - # does not raises error because pydra treats dirpath as a string - with Submitter(worker="cf") as sub: - sub(nn) - - nn2 = DirCountFileAnnot(dirpath=str(dir2)) - with Submitter(worker="cf") as sub: - sub(nn2) From e3fe8e01f8ff992362e741a0257749022ce60c29 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 28 Feb 2025 11:29:28 +1100 Subject: [PATCH 291/342] reverted change to LazyField._get_value --- pydra/engine/lazy.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index e2e52908d8..fd1f628a24 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -152,18 +152,19 @@ def _get_value( value : Any the resolved value of the lazy-field """ + from pydra.utils.typing import ( + TypeParser, + ) # pylint: disable=import-outside-toplevel from pydra.engine.state import StateIndex if state_index is None: state_index = StateIndex() - node_exec = graph.node(self._node.name) - task = node_exec.task(state_index) - split_depth = node_exec.node.state.depth() if node_exec.node.state else 0 + task = graph.node(self._node.name).task(state_index) + _, split_depth = TypeParser.strip_splits(self._type) def get_nested(task: "Task[DefType]", depth: int): - if depth: - assert isinstance(task, StateArray) + if isinstance(task, StateArray): val = [get_nested(task=t, depth=depth - 1) for t in task] if depth: val = StateArray[self._type](val) From 2e3f5edce24dddfb762e0ce1e602943dfd14e8c5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 28 Feb 2025 12:35:55 +1100 Subject: [PATCH 292/342] reworking test_shelltask_inputspec --- .../engine/tests/test_shelltask_inputspec.py | 2431 +++++++---------- 1 file changed, 1008 insertions(+), 1423 deletions(-) diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 272231715b..0c3c6ea879 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -9,49 +9,51 @@ def test_shell_cmd_execargs_1(): # separate command into exec + args - shelly = ShellDef(executable="executable", args="arg") + shelly = shell.define(["executable", "arg"]) assert shelly.cmdline == "executable arg" assert shelly.name == "ShellTask_noname" def test_shell_cmd_execargs_2(): # separate command into exec + args - shelly = ShellDef(executable=["cmd_1", "cmd_2"], args="arg") + shelly = shell.define(["cmd_1", "cmd_2", "arg"]) assert shelly.cmdline == "cmd_1 cmd_2 arg" def test_shell_cmd_inputs_1(): """additional input with provided position""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help": "inp1", "argstr": ""}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + inpA: str = shell.arg(position=1, help="inp1", argstr="") shelly = ShellDef( - executable="executable", args="arg", inpA="inp1", input_spec=my_input_spec + executable="executable", + additional_args=["arg"], + inpA="inp1", ) assert shelly.cmdline == "executable inp1 arg" def test_shell_cmd_inputs_1a(): """additional input without provided position""" - my_input_spec = SpecInfo( - name="Input", - fields=[("inpA", attr.ib(type=str, metadata={"help": "inpA", "argstr": ""}))], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + inpA: str = shell.arg(help="inpA", argstr="") shelly = ShellDef( - executable="executable", args="arg", inpA="inpNone1", input_spec=my_input_spec + executable="executable", + additional_args=["arg"], + inpA="inpNone1", ) # inp1 should be the first one after executable assert shelly.cmdline == "executable inpNone1 arg" @@ -59,23 +61,21 @@ def test_shell_cmd_inputs_1a(): def test_shell_cmd_inputs_1b(): """additional input with negative position""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": -1, "help": "inpA", "argstr": ""}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=-1, help="inpA", argstr="") # separate command into exec + args shelly = ShellDef( - executable="executable", args="arg", inpA="inp-1", input_spec=my_input_spec + executable="executable", + additional_args=["arg"], + inpA="inp-1", ) # inp1 should be last before arg assert shelly.cmdline == "executable inp-1 arg" @@ -83,25 +83,20 @@ def test_shell_cmd_inputs_1b(): def test_shell_cmd_inputs_1_st(): """additional input with provided position, checking cmdline when splitter""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help": "inp1", "argstr": ""}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inp1", argstr="") ShellDef( name="shelly", executable="executable", - args="arg", - input_spec=my_input_spec, + additional_args=["arg"], ).split("inpA", inpA=["inp1", "inp2"]) # cmdline should be a list # assert shelly.cmdline[0] == "executable inp1 arg" @@ -110,51 +105,44 @@ def test_shell_cmd_inputs_1_st(): def test_shell_cmd_inputs_2(): """additional inputs with provided positions""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 2, "help": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpN", "argstr": ""}, - ), - ), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=2, help="inpA", argstr="") + inpB: str = shell.arg(position=1, help="inpN", argstr="") # separate command into exec + args shelly = ShellDef( - executable="executable", inpB="inp1", inpA="inp2", input_spec=my_input_spec + executable="executable", + inpB="inp1", + inpA="inp2", ) assert shelly.cmdline == "executable inp1 inp2" def test_shell_cmd_inputs_2a(): """additional inputs without provided positions""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ("inpA", attr.ib(type=str, metadata={"help": "inpA", "argstr": ""})), - ("inpB", attr.ib(type=str, metadata={"help": "inpB", "argstr": ""})), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(help="inpA", argstr="") + inpB: str = shell.arg(help="inpB", argstr="") # separate command into exec + args shelly = ShellDef( executable="executable", inpA="inpNone1", inpB="inpNone2", - input_spec=my_input_spec, ) # position taken from the order in input definition assert shelly.cmdline == "executable inpNone1 inpNone2" @@ -162,29 +150,21 @@ def test_shell_cmd_inputs_2a(): def test_shell_cmd_inputs_2_err(): """additional inputs with provided positions (exception due to the duplication)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpB", "argstr": ""}, - ), - ), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inpA", argstr="") + inpB: str = shell.arg(position=1, help="inpB", argstr="") shelly = ShellDef( - executable="executable", inpA="inp1", inpB="inp2", input_spec=my_input_spec + executable="executable", + inpA="inp1", + inpB="inp2", ) with pytest.raises(Exception) as e: shelly.cmdline @@ -195,54 +175,37 @@ def test_shell_cmd_inputs_2_noerr(): """additional inputs with provided positions (duplication of the position doesn't lead to error, since only one field has value) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpB", "argstr": ""}, - ), - ), - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", inpA="inp1", input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inpA", argstr="") + inpB: str = shell.arg(position=1, help="inpB", argstr="") + + shelly = ShellDef( + executable="executable", + inpA="inp1", + ) shelly.cmdline def test_shell_cmd_inputs_3(): """additional inputs: positive pos, negative pos and no pos""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpA", "argstr": ""}, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": -1, "help": "inpB", "argstr": ""}, - ), - ), - ("inpC", attr.ib(type=str, metadata={"help": "inpC", "argstr": ""})), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inpA", argstr="") + inpB: str = shell.arg(position=-1, help="inpB", argstr="") + inpC: str = shell.arg(help="inpC", argstr="") # separate command into exec + args shelly = ShellDef( @@ -250,7 +213,6 @@ def test_shell_cmd_inputs_3(): inpA="inp1", inpB="inp-1", inpC="inpNone", - input_spec=my_input_spec, ) # input without position should be between positive an negative positions assert shelly.cmdline == "executable inp1 inpNone inp-1" @@ -258,44 +220,41 @@ def test_shell_cmd_inputs_3(): def test_shell_cmd_inputs_argstr_1(): """additional string inputs with argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpA", "argstr": "-v"}, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", inpA="inp1", input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg(position=1, help="inpA", argstr="-v") + + shelly = ShellDef( + executable="executable", + inpA="inp1", + ) # flag used before inp1 assert shelly.cmdline == "executable -v inp1" def test_shell_cmd_inputs_argstr_2(): """additional bool inputs with argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=bool, - metadata={"position": 1, "help": "inpA", "argstr": "-v"}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: bool = shell.arg(position=1, help="inpA", argstr="-v") # separate command into exec + args shelly = ShellDef( - executable="executable", args="arg", inpA=True, input_spec=my_input_spec + executable="executable", + additional_args=["arg"], + inpA=True, ) # a flag is used without any additional argument assert shelly.cmdline == "executable -v arg" @@ -303,22 +262,19 @@ def test_shell_cmd_inputs_argstr_2(): def test_shell_cmd_inputs_list_1(): """providing list as an additional input, no sep, no argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=ty.List[str], - metadata={"position": 2, "help": "inpA", "argstr": ""}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="") shelly = ShellDef( - executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec + executable="executable", + inpA=["el_1", "el_2", "el_3"], ) # multiple elements assert shelly.cmdline == "executable el_1 el_2 el_3" @@ -326,44 +282,38 @@ def test_shell_cmd_inputs_list_1(): def test_shell_cmd_inputs_list_2(): """providing list as an additional input, no sep, but argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=ty.List[str], - metadata={"position": 2, "help": "inpA", "argstr": "-v"}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v") shelly = ShellDef( - executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec + executable="executable", + inpA=["el_1", "el_2", "el_3"], ) assert shelly.cmdline == "executable -v el_1 el_2 el_3" def test_shell_cmd_inputs_list_3(): """providing list as an additional input, no sep, argstr with ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=ty.List[str], - metadata={"position": 2, "help": "inpA", "argstr": "-v..."}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v...") shelly = ShellDef( - executable="executable", inpA=["el_1", "el_2", "el_3"], input_spec=my_input_spec + executable="executable", + inpA=["el_1", "el_2", "el_3"], ) # a flag is repeated assert shelly.cmdline == "executable -v el_1 -v el_2 -v el_3" @@ -371,29 +321,24 @@ def test_shell_cmd_inputs_list_3(): def test_shell_cmd_inputs_list_sep_1(): """providing list as an additional input:, sep, no argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="", + ) shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, ) # separated by commas assert shelly.cmdline == "executable aaa,bbb,ccc" @@ -401,29 +346,24 @@ def test_shell_cmd_inputs_list_sep_1(): def test_shell_cmd_inputs_list_sep_2(): """providing list as an additional input:, sep, and argstr""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "-v", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v", + ) shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, ) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" @@ -431,29 +371,24 @@ def test_shell_cmd_inputs_list_sep_2(): def test_shell_cmd_inputs_list_sep_2a(): """providing list as an additional input:, sep, and argstr with f-string""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "-v {inpA}", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v {inpA}", + ) shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, ) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" @@ -461,29 +396,24 @@ def test_shell_cmd_inputs_list_sep_2a(): def test_shell_cmd_inputs_list_sep_3(): """providing list as an additional input:, sep, argstr with ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "-v...", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v...", + ) shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, ) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" @@ -491,29 +421,24 @@ def test_shell_cmd_inputs_list_sep_3(): def test_shell_cmd_inputs_list_sep_3a(): """providing list as an additional input:, sep, argstr with ... and f-string""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "-v {inpA}...", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v {inpA}...", + ) shelly = ShellDef( executable="executable", inpA=["aaa", "bbb", "ccc"], - input_spec=my_input_spec, ) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" @@ -521,195 +446,177 @@ def test_shell_cmd_inputs_list_sep_3a(): def test_shell_cmd_inputs_sep_4(): """providing 1-el list as an additional input:, sep, argstr with ...,""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "-v...", - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", inpA=["aaa"], input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v...", + ) + + shelly = ShellDef( + executable="executable", + inpA=["aaa"], + ) assert shelly.cmdline == "executable -v aaa" def test_shell_cmd_inputs_sep_4a(): """providing str instead of list as an additional input:, sep, argstr with ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "sep": ",", - "argstr": "-v...", - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", inpA="aaa", input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + sep=",", + argstr="-v...", + ) + + shelly = ShellDef( + executable="executable", + inpA="aaa", + ) assert shelly.cmdline == "executable -v aaa" def test_shell_cmd_inputs_format_1(): """additional inputs with argstr that has string formatting""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "-v {inpA}", - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", inpA="aaa", input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="-v {inpA}", + ) + + shelly = ShellDef( + executable="executable", + inpA="aaa", + ) assert shelly.cmdline == "executable -v aaa" def test_shell_cmd_inputs_format_2(): """additional inputs with argstr that has string formatting and ...""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=MultiInputObj[str], - metadata={ - "position": 1, - "help": "inpA", - "argstr": "-v {inpA}...", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: MultiInputObj[str] = shell.arg( + position=1, + help="inpA", + argstr="-v {inpA}...", + ) shelly = ShellDef( executable="executable", inpA=["el_1", "el_2"], - input_spec=my_input_spec, ) assert shelly.cmdline == "executable -v el_1 -v el_2" def test_shell_cmd_inputs_format_3(): """adding float formatting for argstr with input field""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=float, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "-v {inpA:.5f}", - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", inpA=0.007, input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: float = shell.arg( + position=1, + help="inpA", + argstr="-v {inpA:.5f}", + ) + + shelly = ShellDef( + executable="executable", + inpA=0.007, + ) assert shelly.cmdline == "executable -v 0.00700" def test_shell_cmd_inputs_mandatory_1(): """additional inputs with mandatory=True""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, + ) + + shelly = ShellDef( + executable="executable", + ) with pytest.raises(Exception) as e: shelly.cmdline assert "mandatory" in str(e.value) def test_shell_cmd_inputs_not_given_1(): - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "arg1", - attr.ib( - type=MultiInputObj, - metadata={ - "argstr": "--arg1", - "help": "Command line argument 1", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + arg1: MultiInputObj = ( + shell.arg( + argstr="--arg1", + help="Command line argument 1", ), - ( - "arg2", - attr.ib( - type=MultiInputObj, - metadata={ - "argstr": "--arg2", - "help": "Command line argument 2", - }, - ), + ) + arg2: MultiInputObj = ( + shell.arg( + argstr="--arg2", + help="Command line argument 2", ), - ( - "arg3", - attr.ib( - type=File, - metadata={ - "argstr": "--arg3", - "help": "Command line argument 3", - }, - ), + ) + arg3: File = ( + shell.arg( + argstr="--arg3", + help="Command line argument 3", ), - ], - bases=(ShellDef,), + ) + + shelly = ShellDef( + name="shelly", + executable="executable", ) - shelly = ShellDef(name="shelly", executable="executable", input_spec=my_input_spec) shelly.definition.arg2 = "argument2" @@ -718,38 +625,32 @@ def test_shell_cmd_inputs_not_given_1(): def test_shell_cmd_inputs_template_1(): """additional inputs, one uses output_file_template (and argstr)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="-o", + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", inpA="inpA") # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" @@ -759,36 +660,30 @@ def test_shell_cmd_inputs_template_1(): def test_shell_cmd_inputs_template_1a(): """additional inputs, one uses output_file_template (without argstr)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help": "outA", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + help="outA", + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", inpA="inpA") # outA has no argstr in metadata, so it's not a part of the command line assert shelly.cmdline == "executable inpA" @@ -796,33 +691,27 @@ def test_shell_cmd_inputs_template_1a(): # TODO: after deciding how we use requires/templates def test_shell_cmd_inputs_template_2(): """additional inputs, one uses output_file_template (and argstr, but input not provided)""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 1, "help": "inpB", "argstr": ""}, - ), - ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outB", - "argstr": "-o", - "output_file_template": "{inpB}_out", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpB: str = shell.arg(position=1, help="inpB", argstr="") + outB: str = ( + shell.arg( + position=2, + help="outB", + argstr="-o", + output_file_template="{inpB}_out", ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec) + shelly = ShellDef( + executable="executable", + ) # inpB not in the inputs, so no outB in the command line assert shelly.cmdline == "executable" # checking if outB in the output fields @@ -837,72 +726,52 @@ def test_shell_cmd_inputs_template_3(tmp_path): inpB = tmp_path / "inpB" Path.touch(inpA) Path.touch(inpB) - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpB", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "inpB", - "argstr": "", - "mandatory": True, - }, - ), + ) + inpB: str = ( + shell.arg( + position=2, + help="inpB", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help": "outA", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + help="outA", + output_file_template="{inpA}_out", ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "help": "outB", - "output_file_template": "{inpB}_out", - }, - ), + ) + outB: str = ( + shell.arg( + help="outB", + output_file_template="{inpB}_out", ), - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help": "outAB", - "argstr": "-o {outA} {outB}", - "readonly": True, - }, - ), + ) + outAB: str = ( + shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA=inpA, inpB=inpB - ) + shelly = ShellDef(executable="executable", inpA=inpA, inpB=inpB) # using syntax from the outAB field assert ( shelly.cmdline @@ -917,72 +786,52 @@ def test_shell_cmd_inputs_template_3a(): read-only fields that combine two outputs together in the command line testing a different order within the input definition """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpB", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "inpB", - "argstr": "", - "mandatory": True, - }, - ), + ) + inpB: str = ( + shell.arg( + position=2, + help="inpB", + argstr="", + mandatory=True, ), - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help": "outAB", - "argstr": "-o {outA} {outB}", - "readonly": True, - }, - ), + ) + outAB: str = ( + shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help": "outA", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + help="outA", + output_file_template="{inpA}_out", ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "help": "outB", - "output_file_template": "{inpB}_out", - }, - ), + ) + outB: str = ( + shell.arg( + help="outB", + output_file_template="{inpB}_out", ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", inpB="inpB" - ) + shelly = ShellDef(executable="executable", inpA="inpA", inpB="inpB") # using syntax from the outAB field assert ( shelly.cmdline @@ -998,65 +847,45 @@ def test_shell_cmd_inputs_template_4(): read-only fields that combine two outputs together in the command line one output_file_template can't be resolved - no inpB is provided """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), - ), - ( - "inpB", - attr.ib( - type=str, - metadata={"position": 2, "help": "inpB", "argstr": ""}, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help": "outAB", - "argstr": "-o {outA} {outB}", - "readonly": True, - }, - ), + ) + inpB: str = shell.arg(position=2, help="inpB", argstr="") + outAB: str = ( + shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "help": "outA", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + help="outA", + output_file_template="{inpA}_out", ), - ( - "outB", - attr.ib( - type=str, - metadata={ - "help": "outB", - "output_file_template": "{inpB}_out", - }, - ), + ) + outB: str = ( + shell.arg( + help="outB", + output_file_template="{inpB}_out", ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", inpA="inpA") # inpB is not provided so outB not in the command line assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] @@ -1064,26 +893,22 @@ def test_shell_cmd_inputs_template_4(): def test_shell_cmd_inputs_template_5_ex(): """checking if the exception is raised for read-only fields when input is set""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "outAB", - attr.ib( - type=str, - metadata={ - "position": -1, - "help": "outAB", - "argstr": "-o", - "readonly": True, - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, outAB="outAB") + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o", + readonly=True, + ) + + shelly = ShellDef(executable="executable", outAB="outAB") with pytest.raises(Exception) as e: shelly.cmdline assert "read only" in str(e.value) @@ -1095,58 +920,46 @@ def test_shell_cmd_inputs_template_6(): whenever the template can be formatted (the same way as for templates that has type=str) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=ty.Union[str, bool], - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: ty.Union[str, bool] = ( + shell.arg( + position=2, + help="outA", + argstr="-o", + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) # no input for outA (and no default value), so the output is created whenever the # template can be formatted (the same way as for templates that has type=str) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", inpA="inpA") assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # a string is provided for outA, so this should be used as the outA value - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" - ) + shelly = ShellDef(executable="executable", inpA="inpA", outA="outA") assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True - ) + shelly = ShellDef(executable="executable", inpA="inpA", outA=True) assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False - ) + shelly = ShellDef(executable="executable", inpA="inpA", outA=False) assert shelly.cmdline == "executable inpA" @@ -1155,58 +968,46 @@ def test_shell_cmd_inputs_template_6a(): and default is set to False, so if nothing is provided as an input, the output is not used """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=ty.Union[str, bool], - default=False, - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: ty.Union[str, bool] = ( + shell.arg( + default=False, + position=2, + help="outA", + argstr="-o", + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) # no input for outA, but default is False, so the outA shouldn't be used - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", inpA="inpA") assert shelly.cmdline == "executable inpA" # a string is provided for outA, so this should be used as the outA value - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA="outA" - ) + shelly = ShellDef(executable="executable", inpA="inpA", outA="outA") assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True - ) + shelly = ShellDef(executable="executable", inpA="inpA", outA=True) assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA="inpA", outA=False - ) + shelly = ShellDef(executable="executable", inpA="inpA", outA=False) assert shelly.cmdline == "executable inpA" @@ -1214,40 +1015,34 @@ def test_shell_cmd_inputs_template_7(tmp_path: Path): """additional inputs uses output_file_template with a suffix (no extension) no keep_extension is used """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="", + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) + shelly = ShellDef(executable="executable", inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( @@ -1260,41 +1055,35 @@ def test_shell_cmd_inputs_template_7a(tmp_path: Path): """additional inputs uses output_file_template with a suffix (no extension) keep_extension is True (as default) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "", - "keep_extension": True, - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="", + keep_extension=True, + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) + shelly = ShellDef(executable="executable", inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( @@ -1307,41 +1096,35 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): """additional inputs uses output_file_template with a suffix (no extension) keep extension is False (so the extension is removed when creating the output) """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "", - "keep_extension": False, - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="", + keep_extension=False, + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) + shelly = ShellDef(executable="executable", inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( @@ -1352,40 +1135,34 @@ def test_shell_cmd_inputs_template_7b(tmp_path: Path): def test_shell_cmd_inputs_template_8(tmp_path: Path): """additional inputs uses output_file_template with a suffix and an extension""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "", - "output_file_template": "{inpA}_out.txt", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="", + output_file_template="{inpA}_out.txt", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "a_file.t" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=inpA_file) + shelly = ShellDef(executable="executable", inpA=inpA_file) # outA should be formatted in a way that inpA extension is removed and the template extension is used assert ( @@ -1398,55 +1175,43 @@ def test_shell_cmd_inputs_template_9(tmp_path: Path): """additional inputs, one uses output_file_template with two fields: one File and one ints - the output should be recreated from the template """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpInt", - attr.ib( - type=int, - metadata={ - "position": 2, - "help": "inp int", - "argstr": "-i", - "mandatory": True, - }, - ), + ) + inpInt: int = ( + shell.arg( + position=2, + help="inp int", + argstr="-i", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpInt}_out.txt", - }, - ), + ) + outA: str = ( + shell.arg( + position=3, + help="outA", + argstr="-o", + output_file_template="{inpA}_{inpInt}_out.txt", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpInt=3 - ) + shelly = ShellDef(executable="executable", inpA=inpA_file, inpInt=3) assert ( shelly.cmdline @@ -1460,55 +1225,43 @@ def test_shell_cmd_inputs_template_9a(tmp_path: Path): """additional inputs, one uses output_file_template with two fields: one file and one string without extension - should be fine """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpStr", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "inp str", - "argstr": "-i", - "mandatory": True, - }, - ), + ) + inpStr: str = ( + shell.arg( + position=2, + help="inp str", + argstr="-i", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpStr}_out.txt", - }, - ), + ) + outA: str = ( + shell.arg( + position=3, + help="outA", + argstr="-o", + output_file_template="{inpA}_{inpStr}_out.txt", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellDef( - executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpStr="hola" - ) + shelly = ShellDef(executable="executable", inpA=inpA_file, inpStr="hola") assert ( shelly.cmdline @@ -1522,48 +1275,38 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): """output_file_template with two fields that are both Files, an exception should be raised """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpFile", - attr.ib( - type=File, - metadata={ - "position": 2, - "help": "inp file", - "argstr": "-i", - "mandatory": True, - }, - ), + ) + inpFile: File = ( + shell.arg( + position=2, + help="inp file", + argstr="-i", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpFile}_out.txt", - }, - ), + ) + outA: str = ( + shell.arg( + position=3, + help="outA", + argstr="-o", + output_file_template="{inpA}_{inpFile}_out.txt", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") @@ -1573,7 +1316,6 @@ def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): shelly = ShellDef( executable="executable", - input_spec=my_input_spec, inpA=inpA_file, inpFile=inpFile_file, ) @@ -1586,55 +1328,44 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): """output_file_template with two fields: a file and a string with extension, that should be used as an additional file and the exception should be raised """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: File = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpStr", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "inp str with extension", - "argstr": "-i", - "mandatory": True, - }, - ), + ) + inpStr: str = ( + shell.arg( + position=2, + help="inp str with extension", + argstr="-i", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 3, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_{inpStr}_out.txt", - }, - ), + ) + outA: str = ( + shell.arg( + position=3, + help="outA", + argstr="-o", + output_file_template="{inpA}_{inpStr}_out.txt", ), - ], - bases=(ShellDef,), - ) + ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") shelly = ShellDef( executable="executable", - input_spec=my_input_spec, inpA=inpA_file, inpStr="hola.txt", ) @@ -1646,38 +1377,32 @@ def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): def test_shell_cmd_inputs_template_10(): """output_file_template uses a float field with formatting""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=float, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "{inpA:.1f}", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: float = ( + shell.arg( + position=1, + help="inpA", + argstr="{inpA:.1f}", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": "file_{inpA:.1f}_out", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="-o", + output_file_template="file_{inpA:.1f}_out", ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA=3.3456) + shelly = ShellDef(executable="executable", inpA=3.3456) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line assert shelly.cmdline == f"executable 3.3 -o {shelly.output_dir / 'file_3.3_out'}" @@ -1689,45 +1414,32 @@ def test_shell_cmd_inputs_template_requires_1(): """Given an input definition with a templated output file subject to required fields, ensure the field is set only when all requirements are met.""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in_file", - attr.ib( - type=str, - metadata={ - "help": "input file", - "mandatory": True, - "argstr": "", - }, - ), - ), - ( - "with_tpl", - attr.ib( - type=bool, - metadata={"help": "enable template"}, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + in_file: str = ( + shell.arg( + help="input file", + mandatory=True, + argstr="", ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "help": "output file", - "argstr": "--tpl", - "output_file_template": "tpl.{in_file}", - "requires": {"with_tpl"}, - }, - ), + ) + with_tpl: bool = shell.arg(help="enable template") + out_file: str = ( + shell.arg( + help="output file", + argstr="--tpl", + output_file_template="tpl.{in_file}", + requires={"with_tpl"}, ), - ], - bases=(ShellDef,), - ) + ) # When requirements are not met. - shelly = ShellDef(executable="cmd", input_spec=my_input_spec, in_file="in.file") + shelly = ShellDef(executable="cmd", in_file="in.file") assert "--tpl" not in shelly.cmdline # When requirements are met. @@ -1744,38 +1456,31 @@ def test_shell_cmd_inputs_template_function_1(): def template_fun(inputs): return "{inpA}_out" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": template_fun, - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="-o", + output_file_template=template_fun, ), - ], - bases=(ShellDef,), - ) + ) - shelly = ShellDef(executable="executable", input_spec=my_input_spec, inpA="inpA") + shelly = ShellDef(executable="executable", inpA="inpA") assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" @@ -1792,50 +1497,38 @@ def template_fun(inputs): else: return "{inpA}_odd" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "inpB", - attr.ib( - type=int, - metadata={ - "help": "inpB", - "mandatory": True, - }, - ), + ) + inpB: int = ( + shell.arg( + help="inpB", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": template_fun, - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="-o", + output_file_template=template_fun, ), - ], - bases=(ShellDef,), - ) + ) shelly = ShellDef( executable="executable", - input_spec=my_input_spec, inpA="inpA", inpB=1, ) @@ -1847,42 +1540,35 @@ def test_shell_cmd_inputs_template_1_st(): """additional inputs, one uses output_file_template (and argstr) testing cmdline when splitter defined """ - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "inpA", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "inpA", - "argstr": "", - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + inpA: str = ( + shell.arg( + position=1, + help="inpA", + argstr="", + mandatory=True, ), - ( - "outA", - attr.ib( - type=str, - metadata={ - "position": 2, - "help": "outA", - "argstr": "-o", - "output_file_template": "{inpA}_out", - }, - ), + ) + outA: str = ( + shell.arg( + position=2, + help="outA", + argstr="-o", + output_file_template="{inpA}_out", ), - ], - bases=(ShellDef,), - ) + ) inpA = ["inpA_1", "inpA_2"] ShellDef( name="f", executable="executable", - input_spec=my_input_spec, ).split("inpA", inpA=inpA) # cmdline_list = shelly.cmdline @@ -1897,187 +1583,91 @@ def test_shell_cmd_inputs_denoise_image( tmp_path, ): """example from #279""" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "image_dimensionality", - attr.ib( - type=int, - metadata={ - "help": """ + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass + + executable = "executable" + + image_dimensionality: int = shell.arg( + help=""" 2/3/4 This option forces the image to be treated as a specified-dimensional image. If not specified, the program tries to infer the dimensionality from the input image. """, - "allowed_values": [2, 3, 4], - "argstr": "-d", - }, - ), - ), - ( - "inputImageFilename", - attr.ib( - type=File, - metadata={ - "help": "A scalar image is expected as input for noise correction.", - "argstr": "-i", - "mandatory": True, - }, - ), - ), - ( - "noise_model", - attr.ib( - type=str, - metadata={ - "help": """ - Rician/(Gaussian) - Employ a Rician or Gaussian noise model. - """, - "allowed_values": ["Rician", "Gaussian"], - "argstr": "-n", - }, - ), - ), - ( - "maskImageFilename", - attr.ib( - type=str, - metadata={ - "help": "If a mask image is specified, denoising is only performed in the mask region.", - "argstr": "-x", - }, - ), - ), - ( - "shrink_factor", - attr.ib( - type=int, - default=1, - metadata={ - "help": """ - (1)/2/3/... - Running noise correction on large images can be time consuming. - To lessen computation time, the input image can be resampled. - The shrink factor, specified as a single integer, describes this - resampling. Shrink factor = 1 is the default. - """, - "argstr": "-s", - }, - ), - ), - ( - "patch_radius", - attr.ib( - type=int, - default=1, - metadata={ - "help": "Patch radius. Default = 1x1x1", - "argstr": "-p", - }, - ), - ), - ( - "search_radius", - attr.ib( - type=int, - default=2, - metadata={ - "help": "Search radius. Default = 2x2x2.", - "argstr": "-r", - }, - ), - ), - ( - "correctedImage", - attr.ib( - type=str, - metadata={ - "help": """ - The output consists of the noise corrected version of the input image. - Optionally, one can also output the estimated noise image. - """, - "output_file_template": "{inputImageFilename}_out", - }, - ), - ), - ( - "noiseImage", - attr.ib( - type=ty.Union[str, bool], - default=False, - metadata={ - "help": """ - The output consists of the noise corrected version of the input image. - Optionally, one can also output the estimated noise image. - """, - "output_file_template": "{inputImageFilename}_noise", - }, - ), - ), - ( - "output", - attr.ib( - type=str, - metadata={ - "help": "Combined output", - "argstr": "-o [{correctedImage}, {noiseImage}]", - "position": -1, - "readonly": True, - }, - ), - ), - ( - "version", - attr.ib( - type=bool, - default=False, - metadata={ - "help": "Get Version Information.", - "argstr": "--version", - }, - ), - ), - ( - "verbose", - attr.ib( - type=int, - default=0, - metadata={"help": "(0)/1. Verbose output. ", "argstr": "-v"}, - ), - ), - ( - "help_short", - attr.ib( - type=bool, - default=False, - metadata={ - "help": "Print the help menu (short version)", - "argstr": "-h", - }, - ), - ), - ( - "help", - attr.ib( - type=int, - metadata={ - "help": "Print the help menu.", - "argstr": "--help", - }, - ), - ), - ], - bases=(ShellDef,), - ) + allowed_values=[2, 3, 4], + argstr="-d", + ) + inputImageFilename: File = shell.arg( + help="A scalar image is expected as input for noise correction.", + argstr="-i", + mandatory=True, + ) + noise_model: str = shell.arg( + help=""" Rician/(Gaussian) Employ a Rician or Gaussian noise model. """, + allowed_values=["Rician", "Gaussian"], + argstr="-n", + ) + maskImageFilename: str = shell.arg( + help="If a mask image is specified, denoising is only performed in the mask region.", + argstr="-x", + ) + shrink_factor: int = shell.arg( + default=1, + help=""" (1)/2/3/... Running noise correction on large images can be time consuming. To lessen computation time, the input image can be resampled. The shrink factor, specified as a single integer, describes this resampling. Shrink factor = 1 is the default. """, + argstr="-s", + ) + patch_radius: int = shell.arg( + default=1, + help="Patch radius. Default = 1x1x1", + argstr="-p", + ) + search_radius: int = shell.arg( + default=2, + help="Search radius. Default = 2x2x2.", + argstr="-r", + ) + correctedImage: str = shell.arg( + help=""" The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, + output_file_template="{inputImageFilename}_out", + ) + noiseImage: ty.Union[str, bool] = shell.arg( + default=False, + help=""" The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, + output_file_template="{inputImageFilename}_noise", + ) + output: str = shell.arg( + help="Combined output", + argstr="-o [{correctedImage}, {noiseImage}]", + position=-1, + readonly=True, + ) + version: bool = shell.arg( + default=False, + help="Get Version Information.", + argstr="--version", + ) + verbose: int = shell.arg(default=0, help="(0)/1. Verbose output. ", argstr="-v") + help_short: bool = shell.arg( + default=False, + help="Print the help menu (short version)", + argstr="-h", + ) + help: int = shell.arg( + help="Print the help menu.", + argstr="--help", + ) my_input_file = tmp_path / "a_file.ext" my_input_file.write_text("content") # no input provided - shelly = ShellDef(executable="DenoiseImage", input_spec=my_input_spec) + shelly = ShellDef( + executable="DenoiseImage", + ) with pytest.raises(Exception) as e: shelly.cmdline assert "mandatory" in str(e.value) @@ -2086,7 +1676,6 @@ def test_shell_cmd_inputs_denoise_image( shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, ) assert ( shelly.cmdline @@ -2097,7 +1686,6 @@ def test_shell_cmd_inputs_denoise_image( shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, noiseImage=True, ) assert ( @@ -2110,7 +1698,6 @@ def test_shell_cmd_inputs_denoise_image( executable="DenoiseImage", inputImageFilename=my_input_file, help_short=True, - input_spec=my_input_spec, ) assert ( shelly.cmdline @@ -2129,7 +1716,6 @@ def test_shell_cmd_inputs_denoise_image( shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, image_dimensionality=2, ) assert ( @@ -2142,7 +1728,6 @@ def test_shell_cmd_inputs_denoise_image( shelly = ShellDef( executable="DenoiseImage", inputImageFilename=my_input_file, - input_spec=my_input_spec, image_dimensionality=5, ) assert "value of image_dimensionality" in str(excinfo.value) From 110bfb5f75247f3c725f925dd1987ce8c40d8f01 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 28 Feb 2025 22:48:52 +1100 Subject: [PATCH 293/342] debugging test_shell_inputspec --- pydra/design/base.py | 25 +- pydra/design/shell.py | 33 +- pydra/engine/helpers_file.py | 4 +- pydra/engine/specs.py | 19 +- .../engine/tests/test_shelltask_inputspec.py | 1200 +++++++---------- pydra/utils/tests/utils.py | 2 - pydra/utils/typing.py | 6 + 7 files changed, 515 insertions(+), 774 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 6b959c8ea5..bedbb26dec 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -62,6 +62,13 @@ def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: return TypeParser[self_.type](self_.type, label=self_.name)(value) +def allowed_values_converter(value: ty.Iterable[str] | None) -> list[str] | None: + """Ensure the allowed_values field is a list of strings or None""" + if value is None: + return None + return list(value) + + @attrs.define class Requirement: """Define a requirement for a task input field @@ -76,14 +83,19 @@ class Requirement: """ name: str - allowed_values: list[str] = attrs.field(factory=list, converter=list) + allowed_values: list[str] | None = attrs.field( + default=None, converter=allowed_values_converter + ) def satisfied(self, inputs: "TaskDef") -> bool: """Check if the requirement is satisfied by the inputs""" value = getattr(inputs, self.name) - if value is attrs.NOTHING: + field = {f.name: f for f in list_fields(inputs)}[self.name] + if value is attrs.NOTHING or field.type is bool and value is False: return False - return not self.allowed_values or value in self.allowed_values + if self.allowed_values is None: + return True + return value in self.allowed_values @classmethod def parse(cls, value: ty.Any) -> Self: @@ -416,6 +428,13 @@ def make_task_def( spec_type._check_arg_refs(inputs, outputs) + # Check that the field attributes are valid after all fields have been set + # (especially the type) + for inpt in inputs.values(): + attrs.validate(inpt) + for outpt in outputs.values(): + attrs.validate(outpt) + if name is None and klass is not None: name = klass.__name__ if reserved_names := [n for n in inputs if n in spec_type.RESERVED_FIELD_NAMES]: diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 7d72050a8e..ebdee524c9 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -110,14 +110,16 @@ def _sep_default(self): @sep.validator def _validate_sep(self, _, sep): - if self.type is ty.Any: - return - if ty.get_origin(self.type) is MultiInputObj: + if self.type is MultiInputObj: + tp = ty.Any + elif ty.get_origin(self.type) is MultiInputObj: tp = ty.get_args(self.type)[0] else: tp = self.type if is_optional(tp): tp = optional_type(tp) + if tp is ty.Any: + return origin = ty.get_origin(tp) or tp if ( @@ -238,16 +240,21 @@ class outarg(arg, Out): @path_template.validator def _validate_path_template(self, attribute, value): - if value and self.default not in (NO_DEFAULT, True, None): - raise ValueError( - f"path_template ({value!r}) can only be provided when no default " - f"({self.default!r}) is provided" - ) - if value and not (is_fileset_or_union(self.type) or self.type is ty.Any): - raise ValueError( - f"path_template ({value!r}) can only be provided when type is a FileSet, " - f"or union thereof, not {self.type!r}" - ) + if value: + if self.default not in (NO_DEFAULT, True, None): + raise ValueError( + f"path_template ({value!r}) can only be provided when no default " + f"({self.default!r}) is provided" + ) + if not (is_fileset_or_union(self.type) or self.type is ty.Any): + raise ValueError( + f"path_template ({value!r}) can only be provided when type is a FileSet, " + f"or union thereof, not {self.type!r}" + ) + if self.argstr is None: + raise ValueError( + f"path_template ({value!r}) can only be provided when argstr is not None" + ) @keep_extension.validator def _validate_keep_extension(self, attribute, value): diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 5be17047b7..567fa3cc5e 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -135,9 +135,7 @@ def template_update( if isinstance(field, shell.outarg) and field.path_template and getattr(definition, field.name) - and all( - getattr(definition, required_field) for required_field in field.requires - ) + and all(req.satisfied(definition) for req in field.requires) ] dict_mod = {} diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 2508ea0a80..24f559632e 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -32,7 +32,7 @@ from . import helpers_state as hlpst from . import lazy from pydra.utils.hash import hash_function, Cache -from pydra.utils.typing import StateArray, MultiInputObj +from pydra.utils.typing import StateArray, is_multi_input from pydra.design.base import Field, Arg, Out, RequirementSet, NO_DEFAULT from pydra.design import shell @@ -981,6 +981,7 @@ def cmdline(self) -> str: the current working directory.""" # checking the inputs fields before returning the command line self._check_resolved() + self._check_rules() # Skip the executable, which can be a multi-part command, e.g. 'docker run'. cmd_args = self._command_args() cmdline = cmd_args[0] @@ -1013,7 +1014,7 @@ def _command_args( for field in list_fields(self): name = field.name value = inputs[name] - if value is None: + if value is None or is_multi_input(field.type) and value == []: continue if name == "executable": pos_args.append(self._command_shelltask_executable(field, value)) @@ -1126,7 +1127,7 @@ def _command_pos_args( # if False, nothing is added to the command. if value is True: cmd_add.append(field.argstr) - elif ty.get_origin(tp) is MultiInputObj: + elif is_multi_input(tp): # if the field is MultiInputObj, it is used to create a list of arguments for val in value or []: cmd_add += self._format_arg(field, val) @@ -1147,7 +1148,9 @@ def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: argstr_formatted_l = [] for val in value: argstr_f = argstr_formatting( - field.argstr, self, value_updates={field.name: val} + field.argstr, + self, + value_updates={field.name: val}, ) argstr_formatted_l.append(f" {argstr_f}") cmd_el_str = field.sep.join(argstr_formatted_l) @@ -1218,20 +1221,20 @@ def split_cmd(cmd: str | None): def argstr_formatting( - argstr: str, inputs: dict[str, ty.Any], value_updates: dict[str, ty.Any] = None + argstr: str, inputs: TaskDef[OutputsType], value_updates: dict[str, ty.Any] = None ): """formatting argstr that have form {field_name}, using values from inputs and updating with value_update if provided """ # if there is a value that has to be updated (e.g. single value from a list) # getting all fields that should be formatted, i.e. {field_name}, ... + inputs_dict = attrs_values(inputs) if value_updates: - inputs = copy(inputs) - inputs.update(value_updates) + inputs_dict.update(value_updates) inp_fields = parse_format_string(argstr) val_dict = {} for fld_name in inp_fields: - fld_value = inputs[fld_name] + fld_value = inputs_dict[fld_name] fld_attr = getattr(attrs.fields(type(inputs)), fld_name) if fld_value is None or ( fld_value is False diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 0c3c6ea879..e5d8cf92cc 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1,22 +1,29 @@ import typing as ty from pathlib import Path -import attr +import attrs import pytest from pydra.engine.specs import ShellOutputs, ShellDef +from pydra.engine.helpers import list_fields from fileformats.generic import File from pydra.design import shell +from pydra.utils.typing import MultiInputObj + + +def get_output_names(task): + return sorted(f.name for f in list_fields(task.Outputs)) def test_shell_cmd_execargs_1(): # separate command into exec + args - shelly = shell.define(["executable", "arg"]) + Shelly = shell.define(["executable", "arg"]) + shelly = Shelly() assert shelly.cmdline == "executable arg" - assert shelly.name == "ShellTask_noname" def test_shell_cmd_execargs_2(): # separate command into exec + args - shelly = shell.define(["cmd_1", "cmd_2", "arg"]) + Shelly = shell.define(["cmd_1", "cmd_2", "arg"]) + shelly = Shelly() assert shelly.cmdline == "cmd_1 cmd_2 arg" @@ -31,8 +38,7 @@ class Outputs(ShellOutputs): executable = "executable" inpA: str = shell.arg(position=1, help="inp1", argstr="") - shelly = ShellDef( - executable="executable", + shelly = Shelly( additional_args=["arg"], inpA="inp1", ) @@ -50,8 +56,7 @@ class Outputs(ShellOutputs): executable = "executable" inpA: str = shell.arg(help="inpA", argstr="") - shelly = ShellDef( - executable="executable", + shelly = Shelly( additional_args=["arg"], inpA="inpNone1", ) @@ -68,12 +73,10 @@ class Outputs(ShellOutputs): pass executable = "executable" - inpA: str = shell.arg(position=-1, help="inpA", argstr="") # separate command into exec + args - shelly = ShellDef( - executable="executable", + shelly = Shelly( additional_args=["arg"], inpA="inp-1", ) @@ -81,28 +84,6 @@ class Outputs(ShellOutputs): assert shelly.cmdline == "executable inp-1 arg" -def test_shell_cmd_inputs_1_st(): - """additional input with provided position, checking cmdline when splitter""" - - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: str = shell.arg(position=1, help="inp1", argstr="") - - ShellDef( - name="shelly", - executable="executable", - additional_args=["arg"], - ).split("inpA", inpA=["inp1", "inp2"]) - # cmdline should be a list - # assert shelly.cmdline[0] == "executable inp1 arg" - # assert shelly.cmdline[1] == "executable inp2 arg" - - def test_shell_cmd_inputs_2(): """additional inputs with provided positions""" @@ -117,8 +98,7 @@ class Outputs(ShellOutputs): inpB: str = shell.arg(position=1, help="inpN", argstr="") # separate command into exec + args - shelly = ShellDef( - executable="executable", + shelly = Shelly( inpB="inp1", inpA="inp2", ) @@ -139,8 +119,7 @@ class Outputs(ShellOutputs): inpB: str = shell.arg(help="inpB", argstr="") # separate command into exec + args - shelly = ShellDef( - executable="executable", + shelly = Shelly( inpA="inpNone1", inpB="inpNone2", ) @@ -151,46 +130,19 @@ class Outputs(ShellOutputs): def test_shell_cmd_inputs_2_err(): """additional inputs with provided positions (exception due to the duplication)""" - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: str = shell.arg(position=1, help="inpA", argstr="") - inpB: str = shell.arg(position=1, help="inpB", argstr="") - - shelly = ShellDef( - executable="executable", - inpA="inp1", - inpB="inp2", - ) with pytest.raises(Exception) as e: - shelly.cmdline - assert "1 is already used" in str(e.value) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + class Outputs(ShellOutputs): + pass -def test_shell_cmd_inputs_2_noerr(): - """additional inputs with provided positions - (duplication of the position doesn't lead to error, since only one field has value) - """ + executable = "executable" - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - class Outputs(ShellOutputs): - pass + inpA: str = shell.arg(position=1, help="inpA", argstr="") + inpB: str = shell.arg(position=1, help="inpB", argstr="") - executable = "executable" - - inpA: str = shell.arg(position=1, help="inpA", argstr="") - inpB: str = shell.arg(position=1, help="inpB", argstr="") - - shelly = ShellDef( - executable="executable", - inpA="inp1", - ) - shelly.cmdline + assert "Multiple fields have the overlapping positions" in str(e.value) def test_shell_cmd_inputs_3(): @@ -208,8 +160,7 @@ class Outputs(ShellOutputs): inpC: str = shell.arg(help="inpC", argstr="") # separate command into exec + args - shelly = ShellDef( - executable="executable", + shelly = Shelly( inpA="inp1", inpB="inp-1", inpC="inpNone", @@ -230,10 +181,7 @@ class Outputs(ShellOutputs): inpA: str = shell.arg(position=1, help="inpA", argstr="-v") - shelly = ShellDef( - executable="executable", - inpA="inp1", - ) + shelly = Shelly(inpA="inp1") # flag used before inp1 assert shelly.cmdline == "executable -v inp1" @@ -251,11 +199,7 @@ class Outputs(ShellOutputs): inpA: bool = shell.arg(position=1, help="inpA", argstr="-v") # separate command into exec + args - shelly = ShellDef( - executable="executable", - additional_args=["arg"], - inpA=True, - ) + shelly = Shelly(additional_args=["arg"], inpA=True) # a flag is used without any additional argument assert shelly.cmdline == "executable -v arg" @@ -270,12 +214,9 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="") + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="", sep=" ") - shelly = ShellDef( - executable="executable", - inpA=["el_1", "el_2", "el_3"], - ) + shelly = Shelly(inpA=["el_1", "el_2", "el_3"]) # multiple elements assert shelly.cmdline == "executable el_1 el_2 el_3" @@ -290,12 +231,9 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v") + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v", sep=" ") - shelly = ShellDef( - executable="executable", - inpA=["el_1", "el_2", "el_3"], - ) + shelly = Shelly(inpA=["el_1", "el_2", "el_3"]) assert shelly.cmdline == "executable -v el_1 el_2 el_3" @@ -309,12 +247,9 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v...") + inpA: ty.List[str] = shell.arg(position=2, help="inpA", argstr="-v...", sep=" ") - shelly = ShellDef( - executable="executable", - inpA=["el_1", "el_2", "el_3"], - ) + shelly = Shelly(inpA=["el_1", "el_2", "el_3"]) # a flag is repeated assert shelly.cmdline == "executable -v el_1 -v el_2 -v el_3" @@ -329,17 +264,14 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: MultiInputObj[str] = shell.arg( + inpA: list[str] = shell.arg( position=1, help="inpA", sep=",", argstr="", ) - shelly = ShellDef( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - ) + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # separated by commas assert shelly.cmdline == "executable aaa,bbb,ccc" @@ -354,17 +286,14 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: MultiInputObj[str] = shell.arg( + inpA: list[str] = shell.arg( position=1, help="inpA", sep=",", argstr="-v", ) - shelly = ShellDef( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - ) + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" @@ -379,17 +308,14 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: MultiInputObj[str] = shell.arg( + inpA: list[str] = shell.arg( position=1, help="inpA", sep=",", argstr="-v {inpA}", ) - shelly = ShellDef( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - ) + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" @@ -404,17 +330,14 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: MultiInputObj[str] = shell.arg( + inpA: list[str] = shell.arg( position=1, help="inpA", sep=",", argstr="-v...", ) - shelly = ShellDef( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - ) + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" @@ -429,17 +352,14 @@ class Outputs(ShellOutputs): executable = "executable" - inpA: MultiInputObj[str] = shell.arg( + inpA: list[str] = shell.arg( position=1, help="inpA", sep=",", argstr="-v {inpA}...", ) - shelly = ShellDef( - executable="executable", - inpA=["aaa", "bbb", "ccc"], - ) + shelly = Shelly(inpA=["aaa", "bbb", "ccc"]) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" @@ -457,14 +377,10 @@ class Outputs(ShellOutputs): inpA: MultiInputObj[str] = shell.arg( position=1, help="inpA", - sep=",", argstr="-v...", ) - shelly = ShellDef( - executable="executable", - inpA=["aaa"], - ) + shelly = Shelly(inpA=["aaa"]) assert shelly.cmdline == "executable -v aaa" @@ -481,14 +397,10 @@ class Outputs(ShellOutputs): inpA: str = shell.arg( position=1, help="inpA", - sep=",", argstr="-v...", ) - shelly = ShellDef( - executable="executable", - inpA="aaa", - ) + shelly = Shelly(inpA="aaa") assert shelly.cmdline == "executable -v aaa" @@ -508,10 +420,7 @@ class Outputs(ShellOutputs): argstr="-v {inpA}", ) - shelly = ShellDef( - executable="executable", - inpA="aaa", - ) + shelly = Shelly(inpA="aaa") assert shelly.cmdline == "executable -v aaa" @@ -531,10 +440,7 @@ class Outputs(ShellOutputs): argstr="-v {inpA}...", ) - shelly = ShellDef( - executable="executable", - inpA=["el_1", "el_2"], - ) + shelly = Shelly(inpA=["el_1", "el_2"]) assert shelly.cmdline == "executable -v el_1 -v el_2" @@ -554,10 +460,7 @@ class Outputs(ShellOutputs): argstr="-v {inpA:.5f}", ) - shelly = ShellDef( - executable="executable", - inpA=0.007, - ) + shelly = Shelly(inpA=0.007) assert shelly.cmdline == "executable -v 0.00700" @@ -575,15 +478,12 @@ class Outputs(ShellOutputs): position=1, help="inpA", argstr="", - mandatory=True, ) - shelly = ShellDef( - executable="executable", - ) + shelly = Shelly() with pytest.raises(Exception) as e: shelly.cmdline - assert "mandatory" in str(e.value) + assert "mandatory" in str(e.value).lower() def test_shell_cmd_inputs_not_given_1(): @@ -594,132 +494,84 @@ class Outputs(ShellOutputs): executable = "executable" - arg1: MultiInputObj = ( - shell.arg( - argstr="--arg1", - help="Command line argument 1", - ), + arg1: MultiInputObj = shell.arg( + argstr="--arg1", + default=attrs.Factory(list), + help="Command line argument 1", ) - arg2: MultiInputObj = ( - shell.arg( - argstr="--arg2", - help="Command line argument 2", - ), + arg2: MultiInputObj = shell.arg( + argstr="--arg2", + help="Command line argument 2", ) - arg3: File = ( - shell.arg( - argstr="--arg3", - help="Command line argument 3", - ), + arg3: File | None = shell.arg( + argstr="--arg3", + default=None, + help="Command line argument 3", ) - shelly = ShellDef( - name="shelly", - executable="executable", - ) + shelly = Shelly() - shelly.definition.arg2 = "argument2" + shelly.arg2 = "argument2" assert shelly.cmdline == "executable --arg2 argument2" def test_shell_cmd_inputs_template_1(): - """additional inputs, one uses output_file_template (and argstr)""" + """additional inputs, one uses path_template (and argstr)""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="-o", - output_file_template="{inpA}_out", - ), - ) - - shelly = ShellDef(executable="executable", inpA="inpA") - # outA has argstr in the metadata fields, so it's a part of the command line - # the full path will be use din the command line - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" - # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] - - -def test_shell_cmd_inputs_template_1a(): - """additional inputs, one uses output_file_template (without argstr)""" - - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - class Outputs(ShellOutputs): - pass + path_template="{inpA}_out", + ) executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( - help="outA", - output_file_template="{inpA}_out", - ), + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) - shelly = ShellDef(executable="executable", inpA="inpA") - # outA has no argstr in metadata, so it's not a part of the command line - assert shelly.cmdline == "executable inpA" + shelly = Shelly(inpA="inpA") + # outA has argstr in the metadata fields, so it's a part of the command line + # the full path will be use din the command line + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" + # checking if outA in the output fields + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] # TODO: after deciding how we use requires/templates def test_shell_cmd_inputs_template_2(): - """additional inputs, one uses output_file_template (and argstr, but input not provided)""" + """additional inputs, one uses path_template (and argstr, but input not provided)""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpB: str = shell.arg(position=1, help="inpB", argstr="") - outB: str = ( - shell.arg( + outB: File | None = shell.outarg( position=2, help="outB", argstr="-o", - output_file_template="{inpB}_out", - ), - ) + path_template="{inpB}_out", + ) - shelly = ShellDef( - executable="executable", - ) + executable = "executable" + + inpB: File | None = shell.arg(position=1, help="inpB", argstr="", default=None) + + shelly = Shelly() # inpB not in the inputs, so no outB in the command line assert shelly.cmdline == "executable" # checking if outB in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outB"] + assert get_output_names(shelly) == ["outB", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_3(tmp_path): - """additional inputs with output_file_template and an additional + """additional inputs with path_template and an additional read-only fields that combine two outputs together in the command line """ inpA = tmp_path / "inpA" @@ -730,59 +582,53 @@ def test_shell_cmd_inputs_template_3(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass + + outA: File = shell.outarg( + help="outA", + path_template="{inpA}_out", + ) + outB: File = shell.outarg( + help="outB", + path_template="{inpB}_out", + ) executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpB: str = ( - shell.arg( - position=2, - help="inpB", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( - help="outA", - output_file_template="{inpA}_out", - ), + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) - outB: str = ( - shell.arg( - help="outB", - output_file_template="{inpB}_out", - ), + inpB: str = shell.arg( + position=2, + help="inpB", + argstr="", ) - outAB: str = ( - shell.arg( - position=-1, - help="outAB", - argstr="-o {outA} {outB}", - readonly=True, - ), + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, ) - shelly = ShellDef(executable="executable", inpA=inpA, inpB=inpB) + shelly = Shelly(inpA=inpA, inpB=inpB) # using syntax from the outAB field assert ( shelly.cmdline - == f"executable {tmp_path / 'inpA'} {tmp_path / 'inpB'} -o {shelly.output_dir / 'inpA_out'} {str(shelly.output_dir / 'inpB_out')}" + == f"executable {tmp_path / 'inpA'} {tmp_path / 'inpB'} -o {Path.cwd() / 'inpA_out'} {str(Path.cwd() / 'inpB_out')}" ) # checking if outA and outB in the output fields (outAB should not be) - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] + assert get_output_names(shelly) == [ + "return_code", + "stdout", + "stderr", + "outA", + "outB", + ] def test_shell_cmd_inputs_template_3a(): - """additional inputs with output_file_template and an additional + """additional inputs with path_template and an additional read-only fields that combine two outputs together in the command line testing a different order within the input definition """ @@ -790,105 +636,95 @@ def test_shell_cmd_inputs_template_3a(): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass + + outA: File = shell.outarg( + help="outA", + path_template="{inpA}_out", + ) + outB: File = shell.outarg( + help="outB", + path_template="{inpB}_out", + ) executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpB: str = ( - shell.arg( - position=2, - help="inpB", - argstr="", - mandatory=True, - ), - ) - outAB: str = ( - shell.arg( - position=-1, - help="outAB", - argstr="-o {outA} {outB}", - readonly=True, - ), + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) - outA: str = ( - shell.arg( - help="outA", - output_file_template="{inpA}_out", - ), + inpB: str = shell.arg( + position=2, + help="inpB", + argstr="", ) - outB: str = ( - shell.arg( - help="outB", - output_file_template="{inpB}_out", - ), + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, ) - shelly = ShellDef(executable="executable", inpA="inpA", inpB="inpB") + shelly = Shelly(inpA="inpA", inpB="inpB") # using syntax from the outAB field assert ( shelly.cmdline - == f"executable inpA inpB -o {shelly.output_dir / 'inpA_out'} {str(shelly.output_dir / 'inpB_out')}" + == f"executable inpA inpB -o {Path.cwd() / 'inpA_out'} {str(Path.cwd() / 'inpB_out')}" ) # checking if outA and outB in the output fields (outAB should not be) - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] + assert get_output_names(shelly) == [ + "return_code", + "stdout", + "stderr", + "outA", + "outB", + ] # TODO: after deciding how we use requires/templates def test_shell_cmd_inputs_template_4(): - """additional inputs with output_file_template and an additional + """additional inputs with path_template and an additional read-only fields that combine two outputs together in the command line - one output_file_template can't be resolved - no inpB is provided + one path_template can't be resolved - no inpB is provided """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass + outA: File = shell.outarg( + help="outA", + path_template="{inpA}_out", + ) + outB: str = shell.arg( + help="outB", + path_template="{inpB}_out", + ) executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) inpB: str = shell.arg(position=2, help="inpB", argstr="") - outAB: str = ( - shell.arg( - position=-1, - help="outAB", - argstr="-o {outA} {outB}", - readonly=True, - ), - ) - outA: str = ( - shell.arg( - help="outA", - output_file_template="{inpA}_out", - ), - ) - outB: str = ( - shell.arg( - help="outB", - output_file_template="{inpB}_out", - ), + outAB: str = shell.arg( + position=-1, + help="outAB", + argstr="-o {outA} {outB}", + readonly=True, ) - shelly = ShellDef(executable="executable", inpA="inpA") + shelly = Shelly(inpA="inpA") # inpB is not provided so outB not in the command line - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" + assert get_output_names(shelly) == [ + "return_code", + "stdout", + "stderr", + "outA", + "outB", + ] def test_shell_cmd_inputs_template_5_ex(): @@ -908,14 +744,14 @@ class Outputs(ShellOutputs): readonly=True, ) - shelly = ShellDef(executable="executable", outAB="outAB") + shelly = Shelly(outAB="outAB") with pytest.raises(Exception) as e: shelly.cmdline assert "read only" in str(e.value) def test_shell_cmd_inputs_template_6(): - """additional inputs with output_file_template that has type ty.Union[str, bool] + """additional inputs with path_template that has type ty.Union[str, bool] no default is set, so if nothing is provided as an input, the output is used whenever the template can be formatted (the same way as for templates that has type=str) @@ -924,47 +760,42 @@ def test_shell_cmd_inputs_template_6(): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: ty.Union[str, bool] = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="-o", - output_file_template="{inpA}_out", - ), + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) # no input for outA (and no default value), so the output is created whenever the # template can be formatted (the same way as for templates that has type=str) - shelly = ShellDef(executable="executable", inpA="inpA") - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + inpA = File.mock("inpA") + shelly = Shelly(inpA=inpA) + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" # a string is provided for outA, so this should be used as the outA value - shelly = ShellDef(executable="executable", inpA="inpA", outA="outA") + shelly = Shelly(inpA=inpA, outA="outA") assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellDef(executable="executable", inpA="inpA", outA=True) - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + shelly = Shelly(inpA=inpA, outA=True) + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellDef(executable="executable", inpA="inpA", outA=False) + shelly = Shelly(inpA=inpA, outA=False) assert shelly.cmdline == "executable inpA" def test_shell_cmd_inputs_template_6a(): - """additional inputs with output_file_template that has type ty.Union[str, bool] + """additional inputs with path_template that has type ty.Union[str, bool] and default is set to False, so if nothing is provided as an input, the output is not used """ @@ -972,340 +803,283 @@ def test_shell_cmd_inputs_template_6a(): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: ty.Union[str, bool] = ( - shell.arg( - default=False, + outA: File | None = shell.outarg( position=2, help="outA", argstr="-o", - output_file_template="{inpA}_out", - ), + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) # no input for outA, but default is False, so the outA shouldn't be used - shelly = ShellDef(executable="executable", inpA="inpA") + shelly = Shelly(inpA="inpA") assert shelly.cmdline == "executable inpA" # a string is provided for outA, so this should be used as the outA value - shelly = ShellDef(executable="executable", inpA="inpA", outA="outA") + shelly = Shelly(inpA="inpA", outA="outA") assert shelly.cmdline == "executable inpA -o outA" # True is provided for outA, so the formatted template should be used as outA value - shelly = ShellDef(executable="executable", inpA="inpA", outA=True) - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + shelly = Shelly(inpA="inpA", outA=True) + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used - shelly = ShellDef(executable="executable", inpA="inpA", outA=False) + shelly = Shelly(inpA="inpA", outA=False) assert shelly.cmdline == "executable inpA" def test_shell_cmd_inputs_template_7(tmp_path: Path): - """additional inputs uses output_file_template with a suffix (no extension) + """additional inputs uses path_template with a suffix (no extension) no keep_extension is used """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="", - output_file_template="{inpA}_out", - ), + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", inpA=inpA_file) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out.txt'}" + == f"executable {tmp_path / 'a_file.txt'} {Path.cwd() / 'a_file_out.txt'}" ) def test_shell_cmd_inputs_template_7a(tmp_path: Path): - """additional inputs uses output_file_template with a suffix (no extension) + """additional inputs uses path_template with a suffix (no extension) keep_extension is True (as default) """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="", keep_extension=True, - output_file_template="{inpA}_out", - ), + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", inpA=inpA_file) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out.txt'}" + == f"executable {tmp_path / 'a_file.txt'} {Path.cwd() / 'a_file_out.txt'}" ) def test_shell_cmd_inputs_template_7b(tmp_path: Path): - """additional inputs uses output_file_template with a suffix (no extension) + """additional inputs uses path_template with a suffix (no extension) keep extension is False (so the extension is removed when creating the output) """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="", keep_extension=False, - output_file_template="{inpA}_out", - ), + path_template="{inpA}_out", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) inpA_file = tmp_path / "a_file.txt" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", inpA=inpA_file) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out'}" + == f"executable {tmp_path / 'a_file.txt'} {Path.cwd() / 'a_file_out'}" ) def test_shell_cmd_inputs_template_8(tmp_path: Path): - """additional inputs uses output_file_template with a suffix and an extension""" + """additional inputs uses path_template with a suffix and an extension""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="", - output_file_template="{inpA}_out.txt", - ), + path_template="{inpA}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) inpA_file = tmp_path / "a_file.t" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", inpA=inpA_file) + shelly = Shelly(inpA=inpA_file) # outA should be formatted in a way that inpA extension is removed and the template extension is used assert ( shelly.cmdline - == f"executable {tmp_path / 'a_file.t'} {shelly.output_dir / 'a_file_out.txt'}" + == f"executable {tmp_path / 'a_file.t'} {Path.cwd() / 'a_file_out.txt'}" ) def test_shell_cmd_inputs_template_9(tmp_path: Path): - """additional inputs, one uses output_file_template with two fields: + """additional inputs, one uses path_template with two fields: one File and one ints - the output should be recreated from the template """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass + outA: File = shell.outarg( + position=3, + help="outA", + argstr="-o", + path_template="{inpA}_{inpInt}_out.txt", + ) executable = "executable" - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpInt: int = ( - shell.arg( - position=2, - help="inp int", - argstr="-i", - mandatory=True, - ), + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) - outA: str = ( - shell.arg( - position=3, - help="outA", - argstr="-o", - output_file_template="{inpA}_{inpInt}_out.txt", - ), + inpInt: int = shell.arg( + position=2, + help="inp int", + argstr="-i", ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", inpA=inpA_file, inpInt=3) + shelly = Shelly(inpA=inpA_file, inpInt=3) assert ( shelly.cmdline - == f"executable {tmp_path / 'inpA.t'} -i 3 -o {shelly.output_dir / 'inpA_3_out.txt'}" + == f"executable {tmp_path / 'inpA.t'} -i 3 -o {Path.cwd() / 'inpA_3_out.txt'}" ) # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_9a(tmp_path: Path): - """additional inputs, one uses output_file_template with two fields: + """additional inputs, one uses path_template with two fields: one file and one string without extension - should be fine """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass + outA: File = shell.outarg( + position=3, + help="outA", + argstr="-o", + path_template="{inpA}_{inpStr}_out.txt", + ) executable = "executable" - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpStr: str = ( - shell.arg( - position=2, - help="inp str", - argstr="-i", - mandatory=True, - ), + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", ) - outA: str = ( - shell.arg( - position=3, - help="outA", - argstr="-o", - output_file_template="{inpA}_{inpStr}_out.txt", - ), + inpStr: str = shell.arg( + position=2, + help="inp str", + argstr="-i", ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellDef(executable="executable", inpA=inpA_file, inpStr="hola") + shelly = Shelly(inpA=inpA_file, inpStr="hola") assert ( shelly.cmdline - == f"executable {tmp_path / 'inpA.t'} -i hola -o {shelly.output_dir / 'inpA_hola_out.txt'}" + == f"executable {tmp_path / 'inpA.t'} -i hola -o {Path.cwd() / 'inpA_hola_out.txt'}" ) # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): - """output_file_template with two fields that are both Files, + """path_template with two fields that are both Files, an exception should be raised """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - executable = "executable" - - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpFile: File = ( - shell.arg( - position=2, - help="inp file", - argstr="-i", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=3, help="outA", argstr="-o", - output_file_template="{inpA}_{inpFile}_out.txt", - ), + path_template="{inpA}_{inpFile}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpFile: File = shell.arg( + position=2, + help="inp file", + argstr="-i", ) inpA_file = tmp_path / "inpA.t" @@ -1314,8 +1088,7 @@ class Outputs(ShellOutputs): inpFile_file = tmp_path / "inpFile.t" inpFile_file.write_text("content") - shelly = ShellDef( - executable="executable", + shelly = Shelly( inpA=inpA_file, inpFile=inpFile_file, ) @@ -1325,47 +1098,38 @@ class Outputs(ShellOutputs): def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): - """output_file_template with two fields: a file and a string with extension, + """path_template with two fields: a file and a string with extension, that should be used as an additional file and the exception should be raised """ @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - inpA: File = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpStr: str = ( - shell.arg( - position=2, - help="inp str with extension", - argstr="-i", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=3, help="outA", argstr="-o", - output_file_template="{inpA}_{inpStr}_out.txt", - ), + path_template="{inpA}_{inpStr}_out.txt", + ) + + executable = "executable" + + inpA: File = shell.arg( + position=1, + help="inpA", + argstr="", + ) + inpStr: str = shell.arg( + position=2, + help="inp str with extension", + argstr="-i", ) inpA_file = tmp_path / "inpA.t" inpA_file.write_text("content") - shelly = ShellDef( - executable="executable", + shelly = Shelly( inpA=inpA_file, inpStr="hola.txt", ) @@ -1376,38 +1140,33 @@ class Outputs(ShellOutputs): def test_shell_cmd_inputs_template_10(): - """output_file_template uses a float field with formatting""" + """path_template uses a float field with formatting""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - inpA: float = ( - shell.arg( - position=1, - help="inpA", - argstr="{inpA:.1f}", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="-o", - output_file_template="file_{inpA:.1f}_out", - ), + path_template="file_{inpA:.1f}_out", + ) + + executable = "executable" + + inpA: float = shell.arg( + position=1, + help="inpA", + argstr="{inpA:.1f}", ) - shelly = ShellDef(executable="executable", inpA=3.3456) + shelly = Shelly(inpA=3.3456) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line - assert shelly.cmdline == f"executable 3.3 -o {shelly.output_dir / 'file_3.3_out'}" + assert shelly.cmdline == f"executable 3.3 -o {Path.cwd() / 'file_3.3_out'}" # checking if outA in the output fields - assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] + assert get_output_names(shelly) == ["outA", "return_code", "stderr", "stdout"] def test_shell_cmd_inputs_template_requires_1(): @@ -1417,38 +1176,33 @@ def test_shell_cmd_inputs_template_requires_1(): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - executable = "executable" - - in_file: str = ( - shell.arg( - help="input file", - mandatory=True, - argstr="", - ), - ) - with_tpl: bool = shell.arg(help="enable template") - out_file: str = ( - shell.arg( + out_file: File | None = shell.outarg( help="output file", argstr="--tpl", - output_file_template="tpl.{in_file}", + path_template="tpl.{in_file}", requires={"with_tpl"}, - ), + ) + + executable = "executable" + + in_file: str = shell.arg( + help="input file", + argstr="", ) + with_tpl: bool = shell.arg(help="enable template", default=False) # When requirements are not met. - shelly = ShellDef(executable="cmd", in_file="in.file") + shelly = Shelly(executable="cmd", in_file="in.file") assert "--tpl" not in shelly.cmdline # When requirements are met. - shelly.definition.with_tpl = True + shelly.with_tpl = True assert "tpl.in.file" in shelly.cmdline def test_shell_cmd_inputs_template_function_1(): - """one input field uses output_file_template that is a simple function + """one input field uses path_template that is a simple function this can be easily done by simple template as in test_shell_cmd_inputs_template_1 """ @@ -1459,34 +1213,29 @@ def template_fun(inputs): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="-o", - output_file_template=template_fun, - ), + path_template=template_fun, + ) + + executable = "executable" + + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) - shelly = ShellDef(executable="executable", inpA="inpA") + shelly = Shelly(inpA="inpA") - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" def test_shell_cmd_inputs_template_function_2(): - """one input field uses output_file_template that is a function, + """one input field uses path_template that is a function, depending on a value of an input it returns different template """ @@ -1500,82 +1249,31 @@ def template_fun(inputs): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass - - executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), - ) - inpB: int = ( - shell.arg( - help="inpB", - mandatory=True, - ), - ) - outA: str = ( - shell.arg( + outA: File = shell.outarg( position=2, help="outA", argstr="-o", - output_file_template=template_fun, - ), - ) - - shelly = ShellDef( - executable="executable", - inpA="inpA", - inpB=1, - ) - - assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_odd'}" - - -def test_shell_cmd_inputs_template_1_st(): - """additional inputs, one uses output_file_template (and argstr) - testing cmdline when splitter defined - """ - - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - class Outputs(ShellOutputs): - pass + path_template=template_fun, + ) executable = "executable" - inpA: str = ( - shell.arg( - position=1, - help="inpA", - argstr="", - mandatory=True, - ), + inpA: str = shell.arg( + position=1, + help="inpA", + argstr="", ) - outA: str = ( - shell.arg( - position=2, - help="outA", - argstr="-o", - output_file_template="{inpA}_out", - ), + inpB: int = shell.arg( + help="inpB", ) - inpA = ["inpA_1", "inpA_2"] - ShellDef( - name="f", - executable="executable", - ).split("inpA", inpA=inpA) + shelly = Shelly( + inpA="inpA", + inpB=1, + ) - # cmdline_list = shelly.cmdline - # assert len(cmdline_list) == 2 - # for i in range(2): - # path_out = Path(shelly.output_dir[i]) / f"{inpA[i]}_out" - # assert cmdline_list[i] == f"executable {inpA[i]} -o {path_out}" + assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_odd'}" # TODO: after deciding how we use requires/templates @@ -1587,37 +1285,56 @@ def test_shell_cmd_inputs_denoise_image( @shell.define class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - pass + + correctedImage: File | None = shell.outarg( + help=""" + The output consists of the noise corrected version of the input image. + Optionally, one can also output the estimated noise image. """, + path_template="{inputImageFilename}_out", + ) + noiseImage: File | None = shell.outarg( + help=""" + The output consists of the noise corrected version of the input image. + Optionally, one can also output the estimated noise image. """, + path_template="{inputImageFilename}_noise", + ) executable = "executable" - image_dimensionality: int = shell.arg( + image_dimensionality: int | None = shell.arg( help=""" 2/3/4 This option forces the image to be treated as a specified-dimensional image. If not specified, the program tries to infer the dimensionality from the input image. """, - allowed_values=[2, 3, 4], + allowed_values=[2, 3, 4, None], + default=None, argstr="-d", ) inputImageFilename: File = shell.arg( help="A scalar image is expected as input for noise correction.", argstr="-i", - mandatory=True, ) - noise_model: str = shell.arg( + noise_model: str | None = shell.arg( + default=None, help=""" Rician/(Gaussian) Employ a Rician or Gaussian noise model. """, allowed_values=["Rician", "Gaussian"], argstr="-n", ) - maskImageFilename: str = shell.arg( + maskImageFilename: str | None = shell.arg( + default=None, help="If a mask image is specified, denoising is only performed in the mask region.", argstr="-x", ) shrink_factor: int = shell.arg( default=1, - help=""" (1)/2/3/... Running noise correction on large images can be time consuming. To lessen computation time, the input image can be resampled. The shrink factor, specified as a single integer, describes this resampling. Shrink factor = 1 is the default. """, + help=""" + (1)/2/3/... + Running noise correction on large images can be time consuming. + To lessen computation time, the input image can be resampled. + The shrink factor, specified as a single integer, describes this + resampling. Shrink factor = 1 is the default. """, argstr="-s", ) patch_radius: int = shell.arg( @@ -1630,16 +1347,8 @@ class Outputs(ShellOutputs): help="Search radius. Default = 2x2x2.", argstr="-r", ) - correctedImage: str = shell.arg( - help=""" The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, - output_file_template="{inputImageFilename}_out", - ) - noiseImage: ty.Union[str, bool] = shell.arg( - default=False, - help=""" The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, - output_file_template="{inputImageFilename}_noise", - ) - output: str = shell.arg( + output: str | None = shell.arg( + default=None, help="Combined output", argstr="-o [{correctedImage}, {noiseImage}]", position=-1, @@ -1656,7 +1365,8 @@ class Outputs(ShellOutputs): help="Print the help menu (short version)", argstr="-h", ) - help: int = shell.arg( + help: int | None = shell.arg( + default=None, help="Print the help menu.", argstr="--help", ) @@ -1665,46 +1375,46 @@ class Outputs(ShellOutputs): my_input_file.write_text("content") # no input provided - shelly = ShellDef( + shelly = Shelly( executable="DenoiseImage", ) with pytest.raises(Exception) as e: shelly.cmdline - assert "mandatory" in str(e.value) + assert "mandatory" in str(e.value).lower() # input file name, noiseImage is not set, so using default value False - shelly = ShellDef( + shelly = Shelly( executable="DenoiseImage", inputImageFilename=my_input_file, ) assert ( shelly.cmdline - == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{shelly.output_dir / 'a_file_out.ext'}]" + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{Path.cwd() / 'a_file_out.ext'}]" ) # input file name, noiseImage is set to True, so template is used in the output - shelly = ShellDef( + shelly = Shelly( executable="DenoiseImage", inputImageFilename=my_input_file, noiseImage=True, ) assert ( shelly.cmdline == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 " - f"-o [{shelly.output_dir / 'a_file_out.ext'}, {str(shelly.output_dir / 'a_file_noise.ext')}]" + f"-o [{Path.cwd() / 'a_file_out.ext'}, {str(Path.cwd() / 'a_file_noise.ext')}]" ) # input file name and help_short - shelly = ShellDef( + shelly = Shelly( executable="DenoiseImage", inputImageFilename=my_input_file, help_short=True, ) assert ( shelly.cmdline - == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -h -o [{shelly.output_dir / 'a_file_out.ext'}]" + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -h -o [{Path.cwd() / 'a_file_out.ext'}]" ) - assert shelly.output_names == [ + assert get_output_names(shelly) == [ "return_code", "stdout", "stderr", @@ -1713,19 +1423,19 @@ class Outputs(ShellOutputs): ] # adding image_dimensionality that has allowed_values [2, 3, 4] - shelly = ShellDef( + shelly = Shelly( executable="DenoiseImage", inputImageFilename=my_input_file, image_dimensionality=2, ) assert ( shelly.cmdline - == f"DenoiseImage -d 2 -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{shelly.output_dir / 'a_file_out.ext'}]" + == f"DenoiseImage -d 2 -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{Path.cwd() / 'a_file_out.ext'}]" ) # adding image_dimensionality that has allowed_values [2, 3, 4] and providing 5 - exception should be raised with pytest.raises(ValueError) as excinfo: - shelly = ShellDef( + shelly = Shelly( executable="DenoiseImage", inputImageFilename=my_input_file, image_dimensionality=5, @@ -1737,7 +1447,7 @@ class Outputs(ShellOutputs): @shell.define -class SimpleTaskXor(ShellDef["SimpleTaskXor.Outputs"]): +class SimpleXor(ShellDef["SimpleTaskXor.Outputs"]): input_1: str = shell.arg( help="help", @@ -1762,28 +1472,28 @@ class Outputs(ShellOutputs): def test_task_inputs_mandatory_with_xOR_one_mandatory_is_OK(): """input definition with mandatory inputs""" - task = SimpleTaskXor() - task.definition.input_1 = "Input1" - task.definition.input_2 = attr.NOTHING - task.definition.check_fields_input_spec() + simple_xor = SimpleXor() + simple_xor.input_1 = "Input1" + simple_xor.input_2 = attrs.NOTHING + simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_one_mandatory_out_3_is_OK(): """input definition with mandatory inputs""" - task = SimpleTaskXor() - task.definition.input_1 = attr.NOTHING - task.definition.input_2 = attr.NOTHING - task.definition.input_3 = True - task.definition.check_fields_input_spec() + simple_xor = SimpleXor() + simple_xor.input_1 = attrs.NOTHING + simple_xor.input_2 = attrs.NOTHING + simple_xor.input_3 = True + simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): """input definition with mandatory inputs""" - task = SimpleTaskXor() - task.definition.input_1 = attr.NOTHING - task.definition.input_2 = attr.NOTHING + simple_xor = SimpleXor() + simple_xor.input_1 = attrs.NOTHING + simple_xor.input_2 = attrs.NOTHING with pytest.raises(Exception) as excinfo: - task.definition.check_fields_input_spec() + simple_xor._check_rules() assert "input_1 is mandatory" in str(excinfo.value) assert "no alternative provided by ['input_2', 'input_3']" in str(excinfo.value) assert excinfo.type is AttributeError @@ -1791,25 +1501,25 @@ def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): """input definition with mandatory inputs""" - task = SimpleTaskXor() - task.definition.input_1 = "Input1" - task.definition.input_2 = True + simple_xor = SimpleXor() + simple_xor.input_1 = "Input1" + simple_xor.input_2 = True with pytest.raises(Exception) as excinfo: - task.definition.check_fields_input_spec() + simple_xor._check_rules() assert "input_1 is mutually exclusive with ['input_2']" in str(excinfo.value) assert excinfo.type is AttributeError def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): """input definition with mandatory inputs""" - task = SimpleTaskXor() - task.definition.input_1 = "Input1" - task.definition.input_2 = True - task.definition.input_3 = False + simple_xor = SimpleXor() + simple_xor.input_1 = "Input1" + simple_xor.input_2 = True + simple_xor.input_3 = False with pytest.raises(Exception) as excinfo: - task.definition.check_fields_input_spec() + simple_xor._check_rules() assert "input_1 is mutually exclusive with ['input_2', 'input_3']" in str( excinfo.value ) diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py index 12cfa74c78..b178f2df24 100644 --- a/pydra/utils/tests/utils.py +++ b/pydra/utils/tests/utils.py @@ -63,7 +63,6 @@ class SpecificShellTask(specs.ShellDef["SpecificShellTask.Outputs"]): help="the input file", argstr="", copy_mode="copy", - sep=" ", ) class Outputs(specs.ShellOutputs): @@ -86,7 +85,6 @@ class OtherSpecificShellTask(ShellDef): help="the input file", argstr="", copy_mode="copy", - sep=" ", ) class Outputs(specs.ShellOutputs): diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index 6c538efaa8..d7aa09309d 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1058,6 +1058,12 @@ def optional_type(type_: type) -> type: return type_ +def is_multi_input(type_: type) -> bool: + """Check if the type is a MultiInputObj""" + type_ = optional_type(type_) + return MultiInputObj in (type_, ty.get_origin(type_)) + + def is_fileset_or_union(type_: type, allow_none: bool | None = None) -> bool: """Check if the type is a FileSet or a Union containing a FileSet From d88acfe2d2f80a2ee8ba74510f99f0e701316876 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 10:31:56 +1100 Subject: [PATCH 294/342] debugging requires for bool fields --- pydra/engine/specs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 24f559632e..f352e134e4 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -507,7 +507,7 @@ def _check_rules(self): # Raise error if any required field is unset. if ( - value is not None + value and field.requires and not any(rs.satisfied(self) for rs in field.requires) ): From d19998117d37292e12e78ecaf1c6c549c0f379b5 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 10:32:32 +1100 Subject: [PATCH 295/342] reworked test_shelltask to new syntax --- pydra/engine/tests/test_shelltask.py | 5170 ++++++++++---------------- 1 file changed, 1881 insertions(+), 3289 deletions(-) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 17dec45a0f..5579298120 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -6,9 +6,8 @@ from pathlib import Path import re import stat - -from ..task import ShellDef from ..submitter import Submitter +from pydra.design import shell, workflow from ..specs import ( ShellOutputs, ShellDef, @@ -32,13 +31,13 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): """simple command, no arguments""" cmd = ["pwd"] - shelly = ShellDef(name="shelly", executable=cmd, cache_dir=tmp_path) + shelly = shell.define(cmd)() assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin=plugin_dask_opt) - assert Path(res.output.stdout.rstrip()) == shelly.output_dir - assert res.output.return_code == 0 - assert res.output.stderr == "" + res = results_function(shelly, plugin=plugin_dask_opt, cache_dir=tmp_path) + assert Path(res.outputs.stdout.rstrip()).parent == tmp_path + assert res.outputs.return_code == 0 + assert res.outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -47,28 +46,28 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): strip option to remove \n at the end os stdout """ cmd = ["pwd"] - shelly = ShellDef(name="shelly", executable=cmd, strip=True) - shelly.cache_dir = tmp_path + shelly = shell.define(cmd)() + assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin) - assert Path(res.output.stdout) == Path(shelly.output_dir) - assert res.output.return_code == 0 - assert res.output.stderr == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert Path(res.outputs.stdout) == Path(shelly.output_dir) + assert res.outputs.return_code == 0 + assert res.outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_2(plugin, results_function, tmp_path): """a command with arguments, cmd and args given as executable""" cmd = ["echo", "hail", "pydra"] - shelly = ShellDef(name="shelly", executable=cmd) - shelly.cache_dir = tmp_path + shelly = shell.define(cmd)() + assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin) - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 - assert res.output.stderr == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout.strip() == " ".join(cmd[1:]) + assert res.outputs.return_code == 0 + assert res.outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -77,15 +76,15 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - shelly = ShellDef(name="shelly", executable=cmd_exec, args=cmd_args) - shelly.cache_dir = tmp_path - assert shelly.definition.executable == "echo" + shelly = shell.define(cmd_exec)(additional_args=cmd_args) + + assert shelly.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) - res = results_function(shelly, plugin) - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 - assert res.output.stderr == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout.strip() == " ".join(cmd_args) + assert res.outputs.return_code == 0 + assert res.outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -94,15 +93,15 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_args = "pydra" # separate command into exec + args - shelly = ShellDef(name="shelly", executable=cmd_exec, args=cmd_args) - shelly.cache_dir = tmp_path - assert shelly.definition.executable == "echo" + shelly = shell.define(cmd_exec)(args=cmd_args) + + assert shelly.executable == "echo" assert shelly.cmdline == "echo pydra" - res = results_function(shelly, plugin) - assert res.output.stdout == "pydra\n" - assert res.output.return_code == 0 - assert res.output.stderr == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "pydra\n" + assert res.outputs.return_code == 0 + assert res.outputs.stderr == "" # tests with State @@ -116,19 +115,18 @@ def test_shell_cmd_3(plugin_dask_opt, tmp_path): cmd = ["pwd", "whoami"] # all args given as executable - shelly = ShellDef(name="shelly").split("executable", executable=cmd) - shelly.cache_dir = tmp_path + shelly = shell.define("placeholder")().split("executable", executable=cmd) # assert shelly.cmdline == ["pwd", "whoami"] res = shelly(plugin=plugin_dask_opt) - assert Path(res[0].output.stdout.rstrip()) == shelly.output_dir[0] + assert Path(res.outputs.stdout[0].rstrip()) == shelly.output_dir[0] if "USER" in os.environ: - assert res[1].output.stdout == f"{os.environ['USER']}\n" + assert res.outputs.stdout[1] == f"{os.environ['USER']}\n" else: - assert res[1].output.stdout - assert res[0].output.return_code == res[1].output.return_code == 0 - assert res[0].output.stderr == res[1].output.stderr == "" + assert res.outputs.stdout[1] + assert res.outputs.return_code[0] == res.outputs.return_code[1] == 0 + assert res.outputs.stderr[0] == res.outputs.stderr[1] == "" def test_shell_cmd_4(plugin, tmp_path): @@ -138,21 +136,18 @@ def test_shell_cmd_4(plugin, tmp_path): cmd_exec = "echo" cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellDef(name="shelly", executable=cmd_exec).split( - splitter="args", args=cmd_args - ) - shelly.cache_dir = tmp_path + shelly = shell.define(cmd_exec)().split(splitter="args", args=cmd_args) assert shelly.inputs.executable == "echo" assert shelly.inputs.args == ["nipype", "pydra"] # assert shelly.cmdline == ["echo nipype", "echo pydra"] res = shelly(plugin=plugin) - assert res[0].output.stdout == "nipype\n" - assert res[1].output.stdout == "pydra\n" + assert res.outputs.stdout[0] == "nipype\n" + assert res.outputs.stdout[1] == "pydra\n" - assert res[0].output.return_code == res[1].output.return_code == 0 - assert res[0].output.stderr == res[1].output.stderr == "" + assert res.outputs.return_code[0] == res.outputs.return_code[1] == 0 + assert res.outputs.stderr[0] == res.outputs.stderr[1] == "" def test_shell_cmd_5(plugin, tmp_path): @@ -163,19 +158,16 @@ def test_shell_cmd_5(plugin, tmp_path): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellDef(name="shelly", executable=cmd_exec) - .split(splitter="args", args=cmd_args) - .combine("args") + shell.define(cmd_exec)().split(splitter="args", args=cmd_args).combine("args") ) - shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.inputs.args == ["nipype", "pydra"] # assert shelly.cmdline == ["echo nipype", "echo pydra"] res = shelly(plugin=plugin) - assert res[0].output.stdout == "nipype\n" - assert res[1].output.stdout == "pydra\n" + assert res.outputs.stdout[0] == "nipype\n" + assert res.outputs.stdout[1] == "pydra\n" def test_shell_cmd_6(plugin, tmp_path): @@ -185,10 +177,9 @@ def test_shell_cmd_6(plugin, tmp_path): cmd_exec = ["echo", ["echo", "-n"]] cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellDef(name="shelly").split( - splitter=["executable", "args"], executable=cmd_exec, args=cmd_args + shelly = shell.define("placeholder").split( + executable=cmd_exec, additional_args=cmd_args ) - shelly.cache_dir = tmp_path assert shelly.inputs.executable == ["echo", ["echo", "-n"]] assert shelly.inputs.args == ["nipype", "pydra"] @@ -200,23 +191,23 @@ def test_shell_cmd_6(plugin, tmp_path): # ] res = shelly(plugin=plugin) - assert res[0].output.stdout == "nipype\n" - assert res[1].output.stdout == "pydra\n" - assert res[2].output.stdout == "nipype" - assert res[3].output.stdout == "pydra" + assert res.outputs.stdout[0] == "nipype\n" + assert res.outputs.stdout[1] == "pydra\n" + assert res.outputs.stdout[2] == "nipype" + assert res.outputs.stdout[3] == "pydra" assert ( - res[0].output.return_code - == res[1].output.return_code - == res[2].output.return_code - == res[3].output.return_code + res.outputs.return_code[0] + == res.outputs.return_code[1] + == res.outputs.return_code[2] + == res.outputs.return_code[3] == 0 ) assert ( - res[0].output.stderr - == res[1].output.stderr - == res[2].output.stderr - == res[3].output.stderr + res.outputs.stderr[0] + == res.outputs.stderr[1] + == res.outputs.stderr[2] + == res.outputs.stderr[3] == "" ) @@ -229,11 +220,10 @@ def test_shell_cmd_7(plugin, tmp_path): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellDef(name="shelly") - .split(splitter=["executable", "args"], executable=cmd_exec, args=cmd_args) + shell.define("placeholder") + .split(executable=cmd_exec, additional_args=cmd_args) .combine("args") ) - shelly.cache_dir = tmp_path assert shelly.inputs.executable == ["echo", ["echo", "-n"]] assert shelly.inputs.args == ["nipype", "pydra"] @@ -252,25 +242,20 @@ def test_shell_cmd_7(plugin, tmp_path): def test_wf_shell_cmd_1(plugin, tmp_path): """a workflow with two connected commands""" - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) - wf.inputs.cmd1 = "pwd" - wf.inputs.cmd2 = "ls" - wf.add(ShellDef(name="shelly_pwd", executable=wf.lzin.cmd1, strip=True)) - wf.add( - ShellDef( - name="shelly_ls", executable=wf.lzin.cmd2, args=wf.shelly_pwd.lzout.stdout - ) - ) - wf.set_output([("out", wf.shelly_ls.lzout.stdout)]) - wf.cache_dir = tmp_path + @workflow.define + def Workflow(cmd1, cmd2): + shelly_pwd = workflow.add(shell.define(cmd1)) + shelly_ls = workflow.add(shell.define(cmd2, additional_args=shelly_pwd.stdout)) + return shelly_ls.stdout - with Submitter(worker=plugin) as sub: - wf(submitter=sub) + wf = Workflow(cmd1="pwd", cmd2="ls") - res = wf.result() - assert "_result.pklz" in res.output.out - assert "_task.pklz" in res.output.out + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) + + assert "_result.pklz" in res.outputs.out + assert "_task.pklz" in res.outputs.out # customised input definition @@ -285,35 +270,27 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_opt = True cmd_args = "hello from pydra" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_n", - attr.ib( - type=bool, - metadata={"position": 1, "argstr": "-n", "help": "option"}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_n: bool = shell.arg( + position=1, + argstr="-n", + help="option", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - args=cmd_args, + shelly = Shelly( + additional_args=cmd_args, opt_n=cmd_opt, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - assert shelly.definition.executable == cmd_exec - assert shelly.definition.args == cmd_args + assert shelly.executable == cmd_exec + assert shelly.args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" - res = results_function(shelly, plugin) - assert res.output.stdout == "hello from pydra" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "hello from pydra" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -326,42 +303,32 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): cmd_opt = True cmd_opt_hello = "HELLO" cmd_args = "from pydra" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_hello", - attr.ib( - type=str, - metadata={"position": 3, "help": "todo", "argstr": ""}, - ), - ), - ( - "opt_n", - attr.ib( - type=bool, - metadata={"position": 1, "help": "todo", "argstr": "-n"}, - ), - ), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_hello: str = shell.arg( + position=3, + help="todo", + argstr="", + ) + opt_n: bool = shell.arg( + position=1, + help="todo", + argstr="-n", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - args=cmd_args, + shelly = Shelly( + additional_args=cmd_args, opt_n=cmd_opt, opt_hello=cmd_opt_hello, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - assert shelly.definition.executable == cmd_exec - assert shelly.definition.args == cmd_args + assert shelly.executable == cmd_exec + assert shelly.args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO from pydra" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "HELLO from pydra" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -369,70 +336,48 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" hello = "HELLO" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + mandatory=True, + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, + shelly = Shelly( text=hello, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "HELLO\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided - using shorter syntax for input definition (no attr.ib) + using shorter syntax for input (no attr.ib) """ cmd_exec = "echo" hello = "HELLO" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - str, - {"position": 1, "help": "text", "mandatory": True, "argstr": ""}, - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg(position=1, help="text", mandatory=True, argstr="") # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, + shelly = Shelly( text=hello, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "HELLO\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -440,62 +385,42 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided after init""" cmd_exec = "echo" hello = "HELLO" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + mandatory=True, + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) - shelly.definition.text = hello + shelly = Shelly(executable=cmd_exec) + shelly.text = hello - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res.output.stdout == "HELLO\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "HELLO\n" def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): """mandatory field added to fields, value is not provided, so exception is raised""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + mandatory=True, + argstr="", + ) + + shelly = Shelly(executable=cmd_exec) with pytest.raises(Exception) as excinfo: shelly() @@ -506,181 +431,139 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): """mandatory=False, so tasks runs fine even without the value""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=ty.Optional[str], - default=None, - metadata={ - "position": 1, - "help": "text", - "mandatory": False, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: ty.Optional[str] = shell.arg( + default=None, + position=1, + help="text", + mandatory=False, + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo" - res = results_function(shelly, plugin) - assert res.output.stdout == "\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hello", - metadata={"position": 1, "help": "text", "argstr": ""}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + default="Hello", + position=1, + help="text", + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - res = results_function(shelly, plugin) - assert res.output.stdout == "Hello\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "Hello\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided - using shorter syntax for input definition (no attr.ib) + using shorter syntax for input (no attr.ib) """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[("text", str, "Hello", {"position": 1, "help": "text", "argstr": ""})], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg(default="Hello", position=1, help="text", argstr="") # separate command into exec + args - shelly = ShellDef( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - res = results_function(shelly, plugin) - assert res.output.stdout == "Hello\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "Hello\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hi", - metadata={"position": 1, "help": "text", "argstr": ""}, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + default="Hi", + position=1, + help="text", + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path - ) + shelly = Shelly(executable=cmd_exec) - assert shelly.definition.executable == cmd_exec + assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hi" - res = results_function(shelly, plugin) - assert res.output.stdout == "Hi\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "Hi\n" def test_shell_cmd_inputspec_4c_exception(plugin): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hello", - metadata={ - "position": 1, - "help": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) # separate command into exec + args with pytest.raises( Exception, match=r"default value \('Hello'\) should not be set when the field" ): - ShellDef(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + default="Hello", + position=1, + help="text", + mandatory=True, + argstr="", + ) def test_shell_cmd_inputspec_4d_exception(plugin): """mandatory field added to fields, value provided""" cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - default="Hello", - metadata={ - "position": 1, - "help": "text", - "output_file_template": "exception", - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) # separate command into exec + args with pytest.raises( Exception, match=r"default value \('Hello'\) should not be set together" - ) as excinfo: - ShellDef(name="shelly", executable=cmd_exec, input_spec=my_input_spec) + ): + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: File = shell.outarg( + default="Hello", + position=1, + help="text", + path_template="exception", + argstr="", + ) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -688,48 +571,28 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): """checking xor in metadata: task should work fine, since only one option is True""" cmd_exec = "ls" cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 1, - "help": "opt t", - "argstr": "-t", - "xor": ["opt_S"], - }, - ), - ), - ( - "opt_S", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help": "opt S", - "argstr": "-S", - "xor": ["opt_t"], - }, - ), - ), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=1, + help="opt t", + argstr="-t", + xor=["opt_S"], + ) + opt_S: bool = shell.arg( + position=2, + help="opt S", + argstr="-S", + xor=["opt_t"], + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.definition.executable == cmd_exec + shelly = Shelly(opt_t=cmd_t) + assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -t" - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): @@ -737,45 +600,24 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): cmd_exec = "ls" cmd_t = True cmd_S = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 1, - "help": "opt t", - "argstr": "-t", - "xor": ["opt_S"], - }, - ), - ), - ( - "opt_S", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help": "opt S", - "argstr": "-S", - "xor": ["opt_t"], - }, - ), - ), - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, - opt_S=cmd_S, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=1, + help="opt t", + argstr="-t", + xor=["opt_S"], + ) + opt_S: bool = shell.arg( + position=2, + help="opt S", + argstr="-S", + xor=["opt_t"], + ) + + shelly = Shelly(opt_t=cmd_t, opt_S=cmd_S) with pytest.raises(Exception) as excinfo: shelly() assert "is mutually exclusive" in str(excinfo.value) @@ -789,44 +631,27 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): cmd_exec = "ls" cmd_l = True cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help": "opt t", - "argstr": "-t", - "requires": ["opt_l"], - }, - ), - ), - ( - "opt_l", - attr.ib( - type=bool, - metadata={"position": 1, "help": "opt l", "argstr": "-l"}, - ), - ), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=2, + help="opt t", + argstr="-t", + requires=["opt_l"], + ) + opt_l: bool = shell.arg( + position=1, + help="opt l", + argstr="-l", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, - opt_l=cmd_l, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) - assert shelly.definition.executable == cmd_exec + shelly = Shelly(opt_t=cmd_t, opt_l=cmd_l) + assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_6a_exception(plugin): @@ -835,35 +660,23 @@ def test_shell_cmd_inputspec_6a_exception(plugin): """ cmd_exec = "ls" cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help": "opt t", - "argstr": "-t", - "requires": ["opt_l"], - }, - ), - ), - ( - "opt_l", - attr.ib( - type=bool, - metadata={"position": 1, "help": "opt l", "argstr": "-l"}, - ), - ), - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", executable=cmd_exec, opt_t=cmd_t, input_spec=my_input_spec - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=2, + help="opt t", + argstr="-t", + requires=["opt_l"], + ) + opt_l: bool = shell.arg( + position=1, + help="opt l", + argstr="-l", + ) + + shelly = Shelly(executable=cmd_exec, opt_t=cmd_t) with pytest.raises(Exception) as excinfo: shelly() assert "requires" in str(excinfo.value) @@ -877,45 +690,31 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): cmd_exec = "ls" cmd_l = True cmd_t = True - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "opt_t", - attr.ib( - type=bool, - metadata={ - "position": 2, - "help": "opt t", - "argstr": "-t", - "requires": ["opt_l"], - }, - ), - ), - ( - "opt_l", - attr.ib( - type=bool, - metadata={"position": 1, "help": "opt l", "argstr": "-l"}, - ), - ), - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + opt_t: bool = shell.arg( + position=2, + help="opt t", + argstr="-t", + requires=["opt_l"], + ) + opt_l: bool = shell.arg( + position=1, + help="opt l", + argstr="-l", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - opt_t=cmd_t, + shelly = Shelly( + opt_t=cmd_t # opt_l=cmd_l, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - shelly.definition.opt_l = cmd_l - assert shelly.definition.executable == cmd_exec + shelly.opt_l = cmd_l + assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -927,34 +726,21 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): cmd = "touch" args = "newfile_tmp.txt" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - args=args, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + shelly = Shelly(executable=cmd, additional_args=args) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - out1 = res.output.out1.fspath + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + out1 = res.outputs.out1.fspath assert out1.exists() # checking if the file is created in a good place assert shelly.output_dir == out1.parent @@ -971,37 +757,27 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): cmd = "touch" args = "newfile_tmp.txt" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "output_field_name": "out1_changed", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + + class Outputs(ShellOutputs): + out1: File = shell.outarg( + path_template="{args}", + output_field_name="out1_changed", + help="output file", ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - args=args, - input_spec=my_input_spec, - cache_dir=tmp_path, + additional_args=args, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1_changed.fspath.parent - assert res.output.out1_changed.fspath.name == "newfile_tmp.txt" + assert shelly.output_dir == res.outputs.out1_changed.fspath.parent + assert res.outputs.out1_changed.fspath.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1012,41 +788,26 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): """ cmd = "touch" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "newfile", - attr.ib( - type=str, - metadata={"position": 1, "help": "new file", "argstr": ""}, - ), - ), - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{newfile}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + newfile: str = shell.arg( + position=1, + help="new file", + argstr="", + ) - shelly = ShellDef( - name="shelly", - executable=cmd, - newfile="newfile_tmp.txt", - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{newfile}", + help="output file", + ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + shelly = Shelly(executable=cmd, newfile="newfile_tmp.txt") + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1058,36 +819,23 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): cmd = "touch" args = "newfile_tmp.txt" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}.txt", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{args}.txt", + help="output file", ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - args=args, - input_spec=my_input_spec, - cache_dir=tmp_path, - ) + shelly = Shelly(executable=cmd, additional_args=args) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1.fspath.parent - assert res.output.out1.fspath.name == "newfile_tmp.txt" + assert shelly.output_dir == res.outputs.out1.fspath.parent + assert res.outputs.out1.fspath.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1098,53 +846,35 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): """ cmd = "touch" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "newfile", - attr.ib( - type=str, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "time", - attr.ib( - type=str, - metadata={ - "position": 1, - "argstr": "-t", - "help": "time of modif.", - }, - ), - ), - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{newfile}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + newfile: str = shell.arg( + position=2, + help="new file", + argstr="", + ) + time: str = shell.arg( + position=1, + argstr="-t", + help="time of modif.", + ) - shelly = ShellDef( - name="shelly", + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{newfile}", + help="output file", + ) + + shelly = Shelly( executable=cmd, newfile="newfile_tmp.txt", time="02121010", - input_spec=my_input_spec, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1155,59 +885,41 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): """ cmd = "touch" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "newfile", - attr.ib( - type=str, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "time", - attr.ib( - type=str, - metadata={ - "position": 1, - "argstr": "-t {time}", - "help": "time of modif.", - }, - ), - ), - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{newfile}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + newfile: str = shell.arg( + position=2, + help="new file", + argstr="", + ) + time: str = shell.arg( + position=1, + argstr="-t {time}", + help="time of modif.", + ) + + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{newfile}", + help="output file", + ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, newfile="newfile_tmp.txt", time="02121010", - input_spec=my_input_spec, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata), + providing output name using input_spec (path_template in metadata), the template has a suffix, the extension of the file will be moved to the end """ cmd = "cp" @@ -1216,51 +928,39 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): file = ddir / ("file.txt") file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) + + class Outputs(ShellOutputs): + file_copy: str = shell.outarg( + path_template="{file_orig}_copy", + help="output file", + argstr="", + ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file_copy.txt" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.file_copy.fspath.exists() + assert res.outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.fspath.parent + assert shelly.output_dir == res.outputs.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [run_no_submitter]) def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata), + providing output name using input_spec (path_template in metadata), the template has a suffix, the extension of the file will be moved to the end the change: input file has directory with a dot """ @@ -1269,97 +969,74 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): file.parent.mkdir() file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) - shelly = ShellDef( - name="shelly", executable=cmd, input_spec=my_input_spec, file_orig=file - ) + class Outputs(ShellOutputs): + file_copy: str = shell.outarg( + path_template="{file_orig}_copy", + help="output file", + argstr="", + ) + + shelly = Shelly(executable=cmd, file_orig=file) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file_copy.txt" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.file_copy.fspath.exists() + assert res.outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.fspath.parent + assert shelly.output_dir == res.outputs.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata) + providing output name using input_spec (path_template in metadata) and the keep_extension is set to False, so the extension is removed completely. """ cmd = "cp" file = tmp_path / "file.txt" file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "keep_extension": False, - "help": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) + + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}_copy", + keep_extension=False, + help="output file", + argstr="", + ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file_copy" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.file_copy.fspath.exists() + assert res.outputs.file_copy.fspath.name == "file_copy" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): """ - providing output name using input_spec (output_file_template in metadata) + providing output name using input_spec (path_template in metadata) and the keep_extension is set to False, so the extension is removed completely, no suffix in the template. """ @@ -1367,52 +1044,40 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): file = tmp_path / "file.txt" file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}", - "keep_extension": False, - "help": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) + + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}", + keep_extension=False, + help="output file", + argstr="", + ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "file" - assert res.output.file_copy.fspath.parent == shelly.output_dir + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.file_copy.fspath.exists() + assert res.outputs.file_copy.fspath.name == "file" + assert res.outputs.file_copy.fspath.parent == shelly.output_dir @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): """ providing output name explicitly by manually setting value in input_spec - (instead of using default provided byoutput_file_template in metadata) + (instead of using default provided bypath_template in metadata) """ cmd = "cp" ddir = tmp_path / "data_inp" @@ -1420,46 +1085,34 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): file = ddir / ("file.txt") file.write_text("content\n") - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": "{file_orig}_copy", - "help": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) + + class Outputs(ShellOutputs): + file_copy: File = shell.outarg( + path_template="{file_orig}_copy", + help="output file", + argstr="", + ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, file_copy="my_file_copy.txt", - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.file_copy.fspath.exists() - assert res.output.file_copy.fspath.name == "my_file_copy.txt" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.file_copy.fspath.exists() + assert res.outputs.file_copy.fspath.name == "my_file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.fspath.parent + assert shelly.output_dir == res.outputs.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1476,37 +1129,24 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): cmd_exec = "cat" files_list = [file_1, file_2] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "files", - attr.ib( - type=ty.List[File], - metadata={ - "position": 1, - "argstr": "...", - "sep": " ", - "help": "list of files", - "mandatory": True, - }, - ), - ) - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + files: ty.List[File] = shell.arg( + position=1, + argstr="...", + sep=" ", + help="list of files", + mandatory=True, + ) - shelly = ShellDef( - name="shelly", - executable=cmd_exec, + shelly = Shelly( files=files_list, - input_spec=my_input_spec, - cache_dir=tmp_path, ) - assert shelly.definition.executable == cmd_exec - res = results_function(shelly, plugin) - assert res.output.stdout == "hello from boston" + assert shelly.executable == cmd_exec + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "hello from boston" def test_shell_cmd_inputspec_10_err(tmp_path): @@ -1525,83 +1165,55 @@ def test_shell_cmd_inputspec_10_err(tmp_path): cmd_exec = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "files", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "a file", - "mandatory": True, - }, - ), - ) - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + files: File = shell.arg( + position=1, + argstr="", + help="a file", + mandatory=True, + ) with pytest.raises(FileNotFoundError): - shelly = ShellDef( - name="shelly", executable=cmd_exec, files=file_2, input_spec=my_input_spec - ) + Shelly(executable=cmd_exec, files=file_2) def test_shell_cmd_inputspec_11(tmp_path): - input_fields = [ - ( - "inputFiles", - attr.ib( - type=MultiInputObj[str], - metadata={ - "argstr": "...", - "help": "The list of input image files to be segmented.", - }, - ), - ) - ] - output_fields = [ - ( - "outputFiles", - attr.ib( - type=MultiOutputFile, - metadata={ - "help": "Corrected Output Images: should specify the same number of images as inputVolume, if only one element is given, then it is used as a file pattern where %s is replaced by the imageVolumeType, and %d by the index list location.", - "output_file_template": "{inputFiles}", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + inputFiles: MultiInputObj[str] = shell.arg( + argstr="...", + help="The list of input image files to be segmented.", ) - ] - input_spec = SpecInfo(name="Input", fields=input_fields, bases=(ShellDef,)) - output_spec = SpecInfo(name="Output", fields=output_fields, bases=(ShellOutputs,)) + executable = "touch" - task = ShellDef( - name="echoMultiple", - executable="touch", - input_spec=input_spec, - output_spec=output_spec, - ) + class Outputs(ShellOutputs): + outputFiles: MultiOutputFile = shell.outarg( + help="""Corrected Output Images: should specify the same number of + images as inputVolume, if only one element is given, then it is used as + a file pattern where %s is replaced by the imageVolumeType, + and %d by the index list location.""", + path_template="{inputFiles}", + ) - wf = Workflow(name="wf", input_spec=["inputFiles"], inputFiles=["test1", "test2"]) + @workflow.define + def Workflow(inputFiles): - task.definition.inputFiles = wf.lzin.inputFiles + echoMultiple = workflow.add(Shelly(inputFiles=inputFiles)) + return echoMultiple.outputFiles - wf.add(task) - wf.set_output([("out", wf.echoMultiple.lzout.outputFiles)]) + wf = Workflow(inputFiles=[File.mock("test1"), File.mock("test2")]) # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_outputspec_7a # see https://github.com/nipype/pydra/issues/671 - with Submitter(worker="serial") as sub: - sub(wf) - result = wf.result() + with Submitter(worker="debug") as sub: + result = sub(wf) - for out_file in outputs.out: + for out_file in result.outputs.out: assert out_file.fspath.name == "test1" or out_file.fspath.name == "test2" @@ -1609,7 +1221,7 @@ def test_shell_cmd_inputspec_11(tmp_path): def test_shell_cmd_inputspec_12(tmp_path: Path, plugin, results_function): """ providing output name using input_spec - output_file_template is provided as a function that returns + path_template is provided as a function that returns various templates depending on the values of inputs fields """ cmd = "cp" @@ -1624,50 +1236,35 @@ def template_function(inputs): else: return "{file_orig}_odd" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file_orig", - attr.ib( - type=File, - metadata={"position": 2, "help": "new file", "argstr": ""}, - ), - ), - ( - "number", - attr.ib( - type=int, - metadata={"help": "a number", "mandatory": True}, - ), - ), - ( - "file_copy", - attr.ib( - type=str, - metadata={ - "output_file_template": template_function, - "help": "output file", - "argstr": "", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file_orig: File = shell.arg( + position=2, + help="new file", + argstr="", + ) + number: int = shell.arg( + help="a number", + mandatory=True, + ) - shelly = ShellDef( - name="shelly", + class Outputs(ShellOutputs): + file_copy: str = shell.outarg( + path_template=template_function, + help="output file", + argstr="", + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, file_orig=file, number=2, - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - fspath = res.output.file_copy.fspath + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + fspath = res.outputs.file_copy.fspath assert fspath.exists() assert fspath.name == "file_even.txt" # checking if it's created in a good place @@ -1677,34 +1274,23 @@ def template_function(inputs): def test_shell_cmd_inputspec_with_iterable(): """Test formatting of argstr with different iterable types.""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "iterable_1", - ty.Iterable[int], - { - "help": "iterable input 1", - "argstr": "--in1", - }, - ), - ( - "iterable_2", - ty.Iterable[str], - { - "help": "iterable input 2", - "argstr": "--in2...", - }, - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "test" + iterable_1: ty.Iterable[int] = shell.arg( + help="iterable input 1", + argstr="--in1", + ) + iterable_2: ty.Iterable[str] = shell.arg( + help="iterable input 2", + argstr="--in2...", + ) - task = ShellDef(name="test", input_spec=input_spec, executable="test") + task = Shelly() for iterable_type in (list, tuple): - task.definition.iterable_1 = iterable_type(range(3)) - task.definition.iterable_2 = iterable_type(["bar", "foo"]) + task.iterable_1 = iterable_type(range(3)) + task.iterable_2 = iterable_type(["bar", "foo"]) assert task.cmdline == "test --in1 0 1 2 --in2 bar --in2 foo" @@ -1720,50 +1306,31 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "orig file", - "mandatory": True, - "copyfile": True, - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + mandatory=True, + copyfile=True, + ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + path_template="{orig_file}", + help="output file", + ) + + shelly = Shelly(executable=cmd, orig_file=str(file)) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.fspath.parent == shelly.output_dir - with open(res.output.out_file) as f: + assert res.outputs.out_file.fspath.parent == shelly.output_dir + with open(res.outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged with open(file) as f: @@ -1782,59 +1349,40 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "orig file", - "mandatory": True, - "copyfile": "hardlink", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + mandatory=True, + copyfile="hardlink", + ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - orig_file=str(file), - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + path_template="{orig_file}", + help="output file", + ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + shelly = Shelly(executable=cmd, orig_file=str(file)) + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying - assert res.output.out_file.fspath.parent == shelly.output_dir + assert res.outputs.out_file.fspath.parent == shelly.output_dir - assert res.output.out_file.fspath.parent.joinpath( - res.output.out_file.fspath.name + "s" + assert res.outputs.out_file.fspath.parent.joinpath( + res.outputs.out_file.fspath.name + "s" ).exists() - with open(res.output.out_file) as f: + with open(res.outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the file is uses a soft link, but it creates and an extra copy # it might depend on the OS - linked_file_copy = res.output.out_file.fspath.parent.joinpath( - res.output.out_file.fspath.name + "s" + linked_file_copy = res.outputs.out_file.fspath.parent.joinpath( + res.outputs.out_file.fspath.name + "s" ) if linked_file_copy.exists(): with open(linked_file_copy) as f: @@ -1861,49 +1409,33 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "orig file", - "mandatory": True, - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + mandatory=True, + ) + + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + path_template="{orig_file}", + help="output file", + ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, orig_file=str(file), - cache_dir=tmp_path, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out_file.fspath.exists() # the file is not copied, it is changed in place - assert res.output.out_file == file - with open(res.output.out_file) as f: + assert res.outputs.out_file == file + with open(res.outputs.out_file) as f: assert "hi from pydra\n" == f.read() @@ -1912,38 +1444,25 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): """adding state to the input from input_spec""" cmd_exec = "echo" hello = ["HELLO", "hi"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "text", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + mandatory=True, + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split("text", text=hello) + shelly = Shelly().split("text", text=hello) assert shelly.inputs.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res[0].output.stdout == "HELLO\n" - assert res[1].output.stdout == "hi\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout[0] == "HELLO\n" + assert res.outputs.stdout[1] == "hi\n" def test_shell_cmd_inputspec_typeval_1(): @@ -1952,22 +1471,17 @@ def test_shell_cmd_inputspec_typeval_1(): """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - attr.ib( - type=int, - metadata={"position": 1, "argstr": "", "help": "text"}, - ), - ) - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: int = shell.arg( + position=1, + argstr="", + help="text", + ) with pytest.raises(TypeError): - ShellDef(executable=cmd_exec, text="hello", input_spec=my_input_spec) + Shelly() def test_shell_cmd_inputspec_typeval_2(): @@ -1976,14 +1490,14 @@ def test_shell_cmd_inputspec_typeval_2(): """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[("text", int, {"position": 1, "argstr": "", "help": "text"})], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + + text: int = shell.arg(position=1, argstr="", help="text") with pytest.raises(TypeError): - ShellDef(executable=cmd_exec, text="hello", input_spec=my_input_spec) + Shelly(text="hello") @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1992,30 +1506,24 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): using shorter syntax for input_spec (without default) """ cmd_exec = "echo" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "text", - str, - {"position": 1, "help": "text", "mandatory": True, "argstr": ""}, - ) - ], - bases=(ShellDef,), - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + text: str = shell.arg( + position=1, + help="text", + mandatory=True, + argstr="", + ) # separate command into exec + args - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split(text=["HELLO", "hi"]) + shelly = Shelly().split(text=["HELLO", "hi"]) assert shelly.inputs.executable == cmd_exec - res = results_function(shelly, plugin) - assert res[0].output.stdout == "HELLO\n" - assert res[1].output.stdout == "hi\n" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout[0] == "HELLO\n" + assert res.outputs.stdout[1] == "hi\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2026,31 +1534,19 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): cmd = "touch" args = ["newfile_1.txt", "newfile_2.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split(args=args) + shelly = Shelly(executable=cmd).split(args=args) - res = results_function(shelly, plugin) + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert res[i].output.stdout == "" assert res[i].output.out1.fspath.exists() @@ -2070,38 +1566,24 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): cmd_exec = "cat" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "files", - "mandatory": True, - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd_exec + file: File = shell.arg( + position=1, + help="files", + mandatory=True, + argstr="", + ) - shelly = ShellDef( - name="shelly", - executable=cmd_exec, - input_spec=my_input_spec, - cache_dir=tmp_path, - ).split(file=[file_1, file_2]) + shelly = Shelly().split(file=[file_1, file_2]) assert shelly.inputs.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin) - assert res[0].output.stdout == "hello from pydra" - assert res[1].output.stdout == "have a nice one" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout[0] == "hello from pydra" + assert res.outputs.stdout[1] == "have a nice one" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2119,51 +1601,35 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path files = [str(file1), str(file2)] cmd = ["sed", "-is", "s/hello/hi/"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "argstr": "", - "help": "orig file", - "mandatory": True, - "copyfile": "copy", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{orig_file}", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + orig_file: File = shell.arg( + position=1, + argstr="", + help="orig file", + mandatory=True, + copyfile="copy", + ) - shelly = ShellDef( - name="shelly", + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + path_template="{orig_file}", + help="output file", + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - cache_dir=tmp_path, ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] - res_l = results_function(shelly, plugin) + res_l = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i, res in enumerate(res_l): - assert res.output.stdout == "" - assert res.output.out_file.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.fspath.parent == shelly.output_dir[i] - with open(res.output.out_file) as f: + assert res.outputs.out_file.fspath.parent == shelly.output_dir[i] + with open(res.outputs.out_file) as f: assert f"hi {txt_l[i]}\n" == f.read() # the original file is unchanged with open(files[i]) as f: @@ -2175,480 +1641,321 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path @pytest.mark.flaky(reruns=2) # when dask def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): - """a workflow with input with defined output_file_template (str) + """a workflow with input with defined path_template (str) that requires wf.lzin """ - wf = Workflow(name="wf", input_spec=["cmd", "args"]) - - wf.inputs.cmd = "touch" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - wf.add( - ShellDef( - name="shelly", - input_spec=my_input_spec, - executable=wf.lzin.cmd, - args=wf.lzin.args, + @workflow.define + def Workflow(cmd, args): + + shelly = workflow.add( + Shelly( + executable=cmd, + additional_args=args, + ) ) - ) - wf.set_output([("out_f", wf.shelly.lzout.out1), ("out", wf.shelly.lzout.stdout)]) + return shelly.out1, shelly.stdout + + wf = Workflow(cmd="touch", args="newfile.txt") with Submitter(worker=plugin_dask_opt) as sub: - wf(submitter=sub) + res = sub(wf) - res = wf.result() - assert res.output.out == "" - assert res.output.out_f.fspath.exists() - assert res.output.out_f.fspath.parent == wf.output_dir + assert res.outputs.out == "" + assert res.outputs.out_f.fspath.exists() + assert res.outputs.out_f.fspath.parent == wf.output_dir def test_wf_shell_cmd_2a(plugin, tmp_path): - """a workflow with input with defined output_file_template (tuple) + """a workflow with input with defined path_template (tuple) that requires wf.lzin """ - wf = Workflow(name="wf", input_spec=["cmd", "args"]) - - wf.inputs.cmd = "touch" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out1", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "placeholder" + + class Outputs(ShellOutputs): + out1: str = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - wf.add( - ShellDef( - name="shelly", - input_spec=my_input_spec, - executable=wf.lzin.cmd, - args=wf.lzin.args, + @workflow.define + def Workflow(cmd, args): + + shelly = workflow.add( + Shelly( + executable=cmd, + additional_args=args, + ) ) - ) - wf.set_output([("out_f", wf.shelly.lzout.out1), ("out", wf.shelly.lzout.stdout)]) + return shelly.out1, shelly.stdout + + wf = Workflow(cmd="touch", args=("newfile.txt",)) with Submitter(worker=plugin) as sub: - wf(submitter=sub) + res = sub(wf) - res = wf.result() - assert res.output.out == "" - assert res.output.out_f.fspath.exists() + assert res.outputs.out == "" + assert res.outputs.out_f.fspath.exists() def test_wf_shell_cmd_3(plugin, tmp_path): """a workflow with 2 tasks, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + class Outputs(ShellOutputs): + file: str = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=File, - metadata={ - "position": 1, - "help": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_copy", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + orig_file: File = shell.arg( + position=1, + help="output file", + argstr="", + ) + + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_copy", + help="output file", + ) - wf.add( - ShellDef( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - args=wf.lzin.args, + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, args): + + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + additional_args=args, + ) ) - ) - wf.add( - ShellDef( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] - ) + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout + + wf = Workflow(cmd1="touch", cmd2="cp", args="newfile.txt") with Submitter(worker=plugin) as sub: - wf(submitter=sub) + res = sub(wf) res = wf.result() - assert res.output.out1 == "" - assert res.output.touch_file.fspath.exists() - assert res.output.touch_file.fspath.parent == wf.output_dir - assert res.output.out2 == "" - assert res.output.cp_file.fspath.exists() - assert res.output.cp_file.fspath.parent == wf.output_dir + assert res.outputs.out1 == "" + assert res.outputs.touch_file.fspath.exists() + assert res.outputs.touch_file.fspath.parent == wf.output_dir + assert res.outputs.out2 == "" + assert res.outputs.cp_file.fspath.exists() + assert res.outputs.cp_file.fspath.parent == wf.output_dir def test_wf_shell_cmd_3a(plugin, tmp_path): """a workflow with 2 tasks, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - wf.inputs.args = "newfile.txt" - wf.cache_dir = tmp_path - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + class Outputs(ShellOutputs): + file: File = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_cp", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + orig_file: str = shell.arg( + position=1, + help="output file", + argstr="", + ) + + class Outputs(ShellOutputs): + out_file: File = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_cp", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, args): - wf.add( - ShellDef( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - args=wf.lzin.args, + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + additional_args=args, + ) ) - ) - wf.add( - ShellDef( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] - ) + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout + + wf = Workflow(cmd1="touch", cmd2="cp", args="newfile.txt") with Submitter(worker=plugin) as sub: - wf(submitter=sub) + res = sub(wf) res = wf.result() - assert res.output.out1 == "" - assert res.output.touch_file.fspath.exists() - assert res.output.out2 == "" - assert res.output.cp_file.fspath.exists() + assert res.outputs.out1 == "" + assert res.outputs.touch_file.fspath.exists() + assert res.outputs.out2 == "" + assert res.outputs.cp_file.fspath.exists() def test_wf_shell_cmd_state_1(plugin, tmp_path): """a workflow with 2 tasks and splitter on the wf level, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow( - name="wf", input_spec=["cmd1", "cmd2", "args"], cache_dir=tmp_path - ).split("args", args=["newfile_1.txt", "newfile_2.txt"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + class Outputs(ShellOutputs): + file: str = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_copy", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + orig_file: str = shell.arg( + position=1, + help="output file", + argstr="", + ) - wf.add( - ShellDef( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - args=wf.lzin.args, + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_copy", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, args): + + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + additional_args=args, + ) ) - ) - wf.add( - ShellDef( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout + + wf = Workflow(cmd1="touch", cmd2="cp").split( + args=["newfile_1.txt", "newfile_2.txt"] ) - with Submitter(worker=plugin) as sub: - wf(submitter=sub) + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) - res_l = wf.result() - for i, res in enumerate(res_l): - assert res.output.out1 == "" - assert res.output.touch_file.fspath.exists() - assert res.output.touch_file.fspath.parent == wf.output_dir[i] - assert res.output.out2 == "" - assert res.output.cp_file.fspath.exists() - assert res.output.cp_file.fspath.parent == wf.output_dir[i] + for i in range(2): + assert res.outputs.out1[i] == "" + assert res.outputs.touch_file[i].fspath.exists() + assert res.outputs.touch_file[i].fspath.parent == wf.output_dir[i] + assert res.outputs.out2[i] == "" + assert res.outputs.cp_file[i].fspath.exists() + assert res.outputs.cp_file[i].fspath.parent == wf.output_dir[i] def test_wf_shell_cmd_ndst_1(plugin, tmp_path): """a workflow with 2 tasks and a splitter on the node level, - first one has input with output_file_template (str, uses wf.lzin), + first one has input with path_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]) - - wf.inputs.cmd1 = "touch" - wf.inputs.cmd2 = "cp" - wf.inputs.args = ["newfile_1.txt", "newfile_2.txt"] - wf.cache_dir = tmp_path - - my_input_spec1 = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=str, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + + @shell.define + class Shelly1(ShellDef["Shelly1.Outputs"]): + class Outputs(ShellOutputs): + file: str = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellDef,), - ) - my_input_spec2 = SpecInfo( - name="Input", - fields=[ - ( - "orig_file", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "output file", - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "position": 2, - "argstr": "", - "output_file_template": "{orig_file}_copy", - "help": "output file", - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly2(ShellDef["Shelly2.Outputs"]): + orig_file: str = shell.arg( + position=1, + help="output file", + argstr="", + ) - wf.add( - ShellDef( - name="shelly1", - input_spec=my_input_spec1, - executable=wf.lzin.cmd1, - ).split("args", args=wf.lzin.args) - ) - wf.add( - ShellDef( - name="shelly2", - input_spec=my_input_spec2, - executable=wf.lzin.cmd2, - orig_file=wf.shelly1.lzout.file, + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + position=2, + argstr="", + path_template="{orig_file}_copy", + help="output file", + ) + + @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) + def Workflow(cmd1, cmd2, args): + + shelly1 = workflow.add( + Shelly1( + executable=cmd1, + ).split("args", args=args) + ) + shelly2 = workflow.add( + Shelly2( + executable=cmd2, + orig_file=shelly1.file, + ) ) - ) - wf.set_output( - [ - ("touch_file", wf.shelly1.lzout.file), - ("out1", wf.shelly1.lzout.stdout), - ("cp_file", wf.shelly2.lzout.out_file), - ("out2", wf.shelly2.lzout.stdout), - ] - ) + return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout + + wf = Workflow(cmd1="touch", cmd2="cp", args=["newfile_1.txt", "newfile_2.txt"]) with Submitter(worker=plugin) as sub: - wf(submitter=sub) + res = sub(wf) res = wf.result() - assert res.output.out1 == ["", ""] - assert all([file.fspath.exists() for file in res.output.touch_file]) - assert res.output.out2 == ["", ""] - assert all([file.fspath.exists() for file in res.output.cp_file]) + assert res.outputs.out1 == ["", ""] + assert all([file.fspath.exists() for file in res.outputs.touch_file]) + assert res.outputs.out2 == ["", ""] + assert all([file.fspath.exists() for file in res.outputs.cp_file]) # customised output definition @@ -2660,18 +1967,14 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): customised output_spec, adding files to the output, providing specific pathname """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path + Shelly = shell.define( + cmd, outputs=[shell.arg(name="newfile", type=File, default="newfile_tmp.txt")] ) + shelly = Shelly() - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2680,18 +1983,20 @@ def test_shell_cmd_outputspec_1a(plugin, results_function, tmp_path): customised output_spec, adding files to the output, providing specific pathname """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", attr.ib(type=File, default="newfile_tmp.txt"))], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.outarg(default="newfile_tmp.txt") + + shelly = Shelly() + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): @@ -2699,14 +2004,16 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): customised output_spec, adding files to the output, providing specific pathname """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp_.txt")], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = "newfile_tmp_.txt" + + shelly = Shelly() with pytest.raises(Exception) as exinfo: with Submitter(worker=plugin) as sub: @@ -2721,18 +2028,20 @@ def test_shell_cmd_outputspec_2(plugin, results_function, tmp_path): using a wildcard in default """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_*.txt")], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = "newfile_*.txt" + + shelly = Shelly() + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): @@ -2741,14 +2050,16 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): using a wildcard in default """ cmd = ["touch", "newfile_tmp.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_*K.txt")], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = "newfile_*K.txt" + + shelly = Shelly() with pytest.raises(Exception) as excinfo: with Submitter(worker=plugin) as sub: @@ -2763,20 +2074,22 @@ def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): using a wildcard in default, should collect two files """ cmd = ["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", MultiOutputFile, "newfile_*.txt")], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: MultiOutputFile = "newfile_*.txt" + + shelly = Shelly() + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.fspath.exists() for file in res.output.newfile]) + assert len(res.outputs.newfile) == 2 + assert all([file.fspath.exists() for file in res.outputs.newfile]) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2792,25 +2105,21 @@ def gather_output(field, output_dir): if field.name == "newfile": return list(Path(output_dir).expanduser().glob("newfile*.txt")) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile", - attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), - ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: MultiOutputFile = shell.outarg(callable=gather_output) + + shelly = Shelly() - res = results_function(shelly, plugin) - assert res.output.stdout == "" + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.fspath.exists() for file in res.output.newfile]) + assert len(res.outputs.newfile) == 2 + assert all([file.fspath.exists() for file in res.outputs.newfile]) assert ( shelly.output_names == shelly._generated_output_names @@ -2831,25 +2140,22 @@ def gather_output(executable, output_dir): files = executable[1:] return [Path(output_dir) / file for file in files] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile", - attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), - ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): - res = results_function(shelly, plugin) - assert res.output.stdout == "" + newfile: MultiOutputFile = shell.arg(callable=gather_output) + + shelly = Shelly() + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.fspath.exists() for file in res.output.newfile]) + assert len(res.outputs.newfile) == 2 + assert all([file.fspath.exists() for file in res.outputs.newfile]) def test_shell_cmd_outputspec_5b_error(): @@ -2864,12 +2170,15 @@ def gather_output(executable, output_dir, ble): files = executable[1:] return [Path(output_dir) / file for file in files] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", attr.ib(type=File, metadata={"callable": gather_output}))], - bases=(ShellOutputs,), - ) - shelly = ShellDef(name="shelly", executable=cmd, output_spec=my_output_spec) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.outarg(callable=gather_output) + + shelly = Shelly() with pytest.raises(AttributeError, match="ble"): shelly() @@ -2877,99 +2186,89 @@ def gather_output(executable, output_dir, ble): @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): """ - Customised output definition defined as a class, + Customised output defined as a class, using a static function to collect output files. """ - @attr.s(kw_only=True) - class MyOutputDef(ShellOutputs): - @staticmethod - def gather_output(executable, output_dir): - files = executable[1:] - return [Path(output_dir) / file for file in files] + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - newfile: MultiOutputFile = attr.ib(metadata={"callable": gather_output}) + executable = ["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"] - shelly = ShellDef( - name="shelly", - executable=["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"], - output_spec=SpecInfo(name="Output", bases=(MyOutputDef,)), - cache_dir=tmp_path, - ) + class Outputs(ShellOutputs): + + @staticmethod + def gather_output(executable, output_dir): + files = executable[1:] + return [Path(output_dir) / file for file in files] + + newfile: MultiOutputFile = shell.arg(callable=gather_output) - res = results_function(shelly, plugin) - assert res.output.stdout == "" + shelly = Shelly() + + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.exists() for file in res.output.newfile]) + assert len(res.outputs.newfile) == 2 + assert all([file.exists() for file in res.outputs.newfile]) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): """ - providing output name by providing output_file_template + providing output name by providing path_template (similar to the previous example, but not touching input_spec) """ cmd = "touch" args = "newfile_tmp.txt" - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - attr.ib( - type=File, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + out1: File = shell.ouarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - args=args, - output_spec=my_output_spec, - cache_dir=tmp_path, + additional_args=args, ) - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert res.outputs.stdout == "" + assert res.outputs.out1.fspath.exists() def test_shell_cmd_outputspec_6a(): """ - providing output name by providing output_file_template + providing output name by providing path_template (using shorter syntax) """ cmd = "touch" args = "newfile_tmp.txt" - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - File, - {"output_file_template": "{args}", "help": "output file"}, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + out1: File = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", executable=cmd, args=args, output_spec=my_output_spec - ) + shelly = Shelly(additional_args=args) res = shelly() - assert res.output.stdout == "" - assert res.output.out1.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2984,67 +2283,38 @@ def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): cmd = "bash" new_files_id = ["1", "2", "3"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "script", - attr.ib( - type=File, - metadata={ - "help": "script file", - "mandatory": True, - "position": 1, - "argstr": "", - }, - ), - ), - ( - "files_id", - attr.ib( - type=MultiInputObj, - metadata={ - "position": 2, - "argstr": "...", - "sep": " ", - "help": "list of name indices", - "mandatory": True, - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + script: File = shell.arg( + help="script file", + mandatory=True, + position=1, + argstr="", + ) + files_id: MultiInputObj = shell.arg( + position=2, + argstr="...", + sep=" ", + help="list of name indices", + mandatory=True, + ) + + class Outputs(ShellOutputs): - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "new_files", - attr.ib( - type=MultiOutputFile, - metadata={ - "output_file_template": "file{files_id}.txt", - "help": "output file", - }, - ), + new_files: MultiOutputFile = shell.outarg( + path_template="file{files_id}.txt", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, + shelly = Shelly( script=file, files_id=new_files_id, ) - res = results_function(shelly, "serial") - assert res.output.stdout == "" - for file in res.output.new_files: + res = results_function(shelly, cache_dir=tmp_path) + assert res.outputs.stdout == "" + for file in res.outputs.new_files: assert file.fspath.exists() @@ -3060,60 +2330,31 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): cmd = "bash" new_files_id = "1" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "script", - attr.ib( - type=File, - metadata={ - "help": "script file", - "mandatory": True, - "position": 1, - "argstr": "", - }, - ), - ), - ( - "files_id", - attr.ib( - type=MultiInputObj, - metadata={ - "position": 2, - "argstr": "...", - "sep": " ", - "help": "list of name indices", - "mandatory": True, - }, - ), - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + script: File = shell.arg( + help="script file", + mandatory=True, + position=1, + argstr="", + ) + files_id: MultiInputObj = shell.arg( + position=2, + argstr="...", + sep=" ", + help="list of name indices", + mandatory=True, + ) + + class Outputs(ShellOutputs): - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "new_files", - attr.ib( - type=MultiOutputFile, - metadata={ - "output_file_template": "file{files_id}.txt", - "help": "output file", - }, - ), + new_files: MultiOutputFile = shell.outarg( + path_template="file{files_id}.txt", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, + shelly = Shelly( script=file, files_id=new_files_id, ) @@ -3122,8 +2363,8 @@ def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_inputspec_11 # see https://github.com/nipype/pydra/issues/671 res = results_function(shelly, "serial") - assert res.output.stdout == "" - assert res.output.new_files.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.new_files.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -3144,48 +2385,32 @@ def get_file_index(stdout): def get_stderr(stderr): return f"stderr: {stderr}" - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - attr.ib( - type=File, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), - ), - ( - "out_file_index", - attr.ib( - type=int, - metadata={"help": "output file", "callable": get_file_index}, - ), - ), - ( - "stderr_field", - attr.ib( - type=str, - metadata={ - "help": "The standard error output", - "callable": get_stderr, - }, - ), - ), - ], - bases=(ShellOutputs,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - shelly = ShellDef( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path - ).split("args", args=args) + executable = cmd + + class Outputs(ShellOutputs): - results = results_function(shelly, plugin) + out1: File = shell.outarg( + path_template="{args}", + help="output file", + ) + out_file_index: int = shell.arg( + help="output file", + callable=get_file_index, + ) + stderr_field: str = shell.arg( + help="The standard error output", + callable=get_stderr, + ) + + shelly = Shelly().split("additional_args", args=args) + + results = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index, res in enumerate(results): - assert res.output.out_file_index == index + 1 - assert res.output.stderr_field == f"stderr: {res.output.stderr}" + assert res.outputs.out_file_index == index + 1 + assert res.outputs.stderr_field == f"stderr: {res.outputs.stderr}" def test_shell_cmd_outputspec_8b_error(): @@ -3196,19 +2421,16 @@ def test_shell_cmd_outputspec_8b_error(): cmd = "echo" args = ["newfile_1.txt", "newfile_2.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out", - attr.ib(type=int, metadata={"help": "output file", "value": "val"}), - ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef(name="shelly", executable=cmd, output_spec=my_output_spec).split( - "args", args=args - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + out: int = shell.arg(help="output file", value="val") + + shelly = Shelly().split("additional_args", args=args) with pytest.raises(Exception) as e: shelly() assert "has to have a callable" in str(e.value) @@ -3226,32 +2448,21 @@ def get_lowest_directory(directory_path): cmd = "mkdir" args = [f"{tmp_path}/dir1", f"{tmp_path}/dir2"] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "resultsDir", - attr.ib( - type=Directory, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + resultsDir: Directory = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - output_spec=my_output_spec, - resultsDir="outdir", - cache_dir=tmp_path, - ).split("args", args=args) + shelly = Shelly(resultsDir="outdir").split(additional_args=args) - results_function(shelly, plugin) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index, arg_dir in enumerate(args): assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" @@ -3269,58 +2480,37 @@ def get_lowest_directory(directory_path): cmd = "mkdir" - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "resultsDir", - attr.ib( - type=str, - metadata={ - "position": 1, - "help": "new directory", - "argstr": "", - }, - ), - ) - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + resultsDir: str = shell.arg( + position=1, + help="new directory", + argstr="", + ) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "resultsDir", - attr.ib( - type=Directory, - metadata={ - "output_file_template": "{resultsDir}", - "help": "output file", - }, - ), + executable = cmd + + class Outputs(ShellOutputs): + + resultsDir: Directory = shell.outarg( + path_template="{resultsDir}", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name=cmd, - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - cache_dir=tmp_path, - resultsDir="test", # Path(tmp_path) / "test" TODO: Not working without absolute path support - ) + shelly = Shelly(resultsDir="test") assert ( shelly.output_names == shelly._generated_output_names == ["return_code", "stdout", "stderr", "resultsDir"] ) - res = results_function(shelly, plugin) + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) print("Cache_dirr:", shelly.cache_dir) assert (shelly.output_dir / Path("test")).exists() - assert get_lowest_directory(res.output.resultsDir) == get_lowest_directory( + assert get_lowest_directory(res.outputs.resultsDir) == get_lowest_directory( shelly.output_dir / Path("test") ) @@ -3328,37 +2518,29 @@ def get_lowest_directory(directory_path): @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): """ - providing output name by providing output_file_template + providing output name by providing path_template splitter for a field that is used in the template """ cmd = "touch" args = ["newfile_1.txt", "newfile_2.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "out1", - attr.ib( - type=File, - metadata={ - "output_file_template": "{args}", - "help": "output file", - }, - ), + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + out1: File = shell.outarg( + path_template="{args}", + help="output file", ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + shelly = Shelly( executable=cmd, - output_spec=my_output_spec, - cache_dir=tmp_path, ).split("args", args=args) - res = results_function(shelly, plugin) + res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert res[i].output.stdout == "" assert res[i].output.out1.fspath.exists() @@ -3374,28 +2556,29 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): """ cmd = ["touch", "newfile_tmp.txt"] - wf = Workflow(name="wf", input_spec=["cmd"]) - wf.inputs.cmd = cmd - wf.cache_dir = tmp_path - - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "newfile_tmp.txt")], - bases=(ShellOutputs,), - ) - wf.add(ShellDef(name="shelly", executable=wf.lzin.cmd, output_spec=my_output_spec)) - wf.set_output( - [("stdout", wf.shelly.lzout.stdout), ("newfile", wf.shelly.lzout.newfile)] - ) - with Submitter(worker=plugin) as sub: - wf(submitter=sub) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - res = wf.result() - assert res.output.stdout == "" - assert res.output.newfile.fspath.exists() + executable = cmd + + class Outputs(ShellOutputs): + newfile: File = shell.outarg(default="newfile_tmp.txt") + + @workflow.define(outputs=["stdout", "newfile"]) + def Workflow(cmd): + shelly = workflow.add(Shelly()) + return shelly.stdout, shelly.newfile + + wf = Workflow() + + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + res = sub(wf) + + assert res.outputs.stdout == "" + assert res.outputs.newfile.fspath.exists() # checking if the file was copied to the wf dir - assert res.output.newfile.fspath.parent == wf.output_dir + assert res.outputs.newfile.fspath.parent == wf.output_dir def test_shell_cmd_inputspec_outputspec_1(): @@ -3403,52 +2586,24 @@ def test_shell_cmd_inputspec_outputspec_1(): customised input_spec and output_spec, output_spec uses input_spec fields in templates """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help": "newfile 1"}, - ), - ( - "newfile2", - File, - {"output_file_template": "{file2}", "help": "newfile 2"}, - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.file2 = "new_file_2.txt" + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + file1: File = shell.arg(help="1st creadted file", argstr="", position=1) + file2: File = shell.arg(help="2nd creadted file", argstr="", position=2) - res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2.fspath.exists() + class Outputs(ShellOutputs): + newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") + newfile2: File = shell.outarg(path_template="{file2}", help="newfile 2") + + executable = cmd + + shelly = Shelly(file1="new_file_1.txt", file2="new_file_2.txt") + + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_1a(): @@ -3457,52 +2612,33 @@ def test_shell_cmd_inputspec_outputspec_1a(): file2 is used in a template for newfile2, but it is not provided, so newfile2 is set to NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help": "newfile 1"}, - ), - ( - "newfile2", - File, - {"output_file_template": "{file2}", "help": "newfile 2"}, - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + executable = cmd + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") + newfile2: File = shell.outarg(path_template="{file2}", help="newfile 2") + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" + shelly.file1 = "new_file_1.txt" res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() # newfile2 is not created, since file2 is not provided - assert res.output.newfile2 is attr.NOTHING + assert res.outputs.newfile2 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_2(): @@ -3510,55 +2646,29 @@ def test_shell_cmd_inputspec_outputspec_2(): customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - "requires": ["file1"], - }, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help": "newfile 1", - "requires": ["file1", "file2"], - }, - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", - executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, - ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.file2 = "new_file_2.txt" + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + requires=["file1"], + ) + newfile2: File = shell.outarg( + path_template="{file2}", + help="newfile 1", + requires=["file1", "file2"], + ) + + shelly = Shelly() + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" # all fields from output_spec should be in output_names and _generated_output_names assert ( shelly.output_names @@ -3567,9 +2677,9 @@ def test_shell_cmd_inputspec_outputspec_2(): ) res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() + assert res.outputs.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_2a(): @@ -3577,54 +2687,30 @@ def test_shell_cmd_inputspec_outputspec_2a(): customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help": "2nd creadted file", "argstr": "", "position": 2}, - ), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - "requires": ["file1"], - }, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help": "newfile 1", - "requires": ["file1", "file2"], - }, - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + requires=["file1"], + ) + newfile2: File = shell.outarg( + path_template="{file2}", + help="newfile 1", + requires=["file1", "file2"], + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" + shelly.file1 = "new_file_1.txt" # _generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", @@ -3641,9 +2727,9 @@ def test_shell_cmd_inputspec_outputspec_2a(): ] res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2 is attr.NOTHING + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() + assert res.outputs.newfile2 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_3(): @@ -3652,58 +2738,34 @@ def test_shell_cmd_inputspec_outputspec_3(): adding one additional input that is not in the template, but in the requires field, """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help": "2nd creadted file", "argstr": "", "position": 2}, - ), - ("additional_inp", int, {"help": "additional inp"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help": "newfile 1"}, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help": "newfile 1", - "requires": ["file1", "additional_inp"], - }, - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + additional_inp: int = shell.arg(help="additional inp") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") + newfile2: File = shell.outarg( + path_template="{file2}", + help="newfile 1", + requires=["file1", "additional_inp"], + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.file2 = "new_file_2.txt" - shelly.definition.additional_inp = 2 + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" + shelly.additional_inp = 2 res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() - assert res.output.newfile2.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() + assert res.outputs.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_3a(): @@ -3713,52 +2775,28 @@ def test_shell_cmd_inputspec_outputspec_3a(): the additional input not provided, so the output is NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ( - "file2", - str, - {"help": "2nd creadted file", "argstr": "", "position": 2}, - ), - ("additional_inp", str, {"help": "additional inp"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - {"output_file_template": "{file1}", "help": "newfile 1"}, - ), - ( - "newfile2", - File, - { - "output_file_template": "{file2}", - "help": "newfile 1", - "requires": ["file1", "additional_inp"], - }, - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + additional_inp: str = shell.arg(help="additional inp") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") + newfile2: File = shell.outarg( + path_template="{file2}", + help="newfile 1", + requires=["file1", "additional_inp"], + ) + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.file2 = "new_file_2.txt" + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" # _generated_output_names should know that newfile2 will not be generated assert shelly.output_names == [ "return_code", @@ -3775,10 +2813,10 @@ def test_shell_cmd_inputspec_outputspec_3a(): ] res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() # additional input not provided so no newfile2 set (even if the file was created) - assert res.output.newfile2 is attr.NOTHING + assert res.outputs.newfile2 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_4(): @@ -3787,42 +2825,26 @@ def test_shell_cmd_inputspec_outputspec_4(): adding one additional input to the requires together with a list of the allowed values, """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp", int, {"help": "additional inp"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - "requires": ["file1", ("additional_inp", [2, 3])], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp: int = shell.arg(help="additional inp") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + requires=["file1", ("additional_inp", [2, 3])], ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.additional_inp = 2 + shelly.file1 = "new_file_1.txt" + shelly.additional_inp = 2 # _generated_output_names should be the same as output_names assert ( shelly.output_names @@ -3831,8 +2853,8 @@ def test_shell_cmd_inputspec_outputspec_4(): ) res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_4a(): @@ -3842,47 +2864,31 @@ def test_shell_cmd_inputspec_outputspec_4a(): the input is set to a value that is not in the list, so output is NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp", int, {"help": "additional inp"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - "requires": ["file1", ("additional_inp", [2, 3])], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp: int = shell.arg(help="additional inp") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + requires=["file1", ("additional_inp", [2, 3])], ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" + shelly.file1 = "new_file_1.txt" # the value is not in the list from requires - shelly.definition.additional_inp = 1 + shelly.additional_inp = 1 res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1 is attr.NOTHING + assert res.outputs.stdout == "" + assert res.outputs.newfile1 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_5(): @@ -3892,51 +2898,35 @@ def test_shell_cmd_inputspec_outputspec_5(): the firs element of the requires list has all the fields set """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", int, {"help": "additional inp A"}), - ("additional_inp_B", str, {"help": "additional inp B"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - # requires is a list of list so it's treated as el[0] OR el[1] OR... - "requires": [ - ["file1", "additional_inp_A"], - ["file1", "additional_inp_B"], - ], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: int = shell.arg(help="additional inp A") + additional_inp_B: str = shell.arg(help="additional inp B") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + # requires is a list of list so it's treated as el[0] OR el[1] OR... + requires=[ + ["file1", "additional_inp_A"], + ["file1", "additional_inp_B"], + ], ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.additional_inp_A = 2 + shelly.file1 = "new_file_1.txt" + shelly.additional_inp_A = 2 res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_5a(): @@ -3946,51 +2936,35 @@ def test_shell_cmd_inputspec_outputspec_5a(): the second element of the requires list (i.e. additional_inp_B) has all the fields set """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", str, {"help": "additional inp A"}), - ("additional_inp_B", int, {"help": "additional inp B"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - # requires is a list of list so it's treated as el[0] OR el[1] OR... - "requires": [ - ["file1", "additional_inp_A"], - ["file1", "additional_inp_B"], - ], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str = shell.arg(help="additional inp A") + additional_inp_B: int = shell.arg(help="additional inp B") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + # requires is a list of list so it's treated as el[0] OR el[1] OR... + requires=[ + ["file1", "additional_inp_A"], + ["file1", "additional_inp_B"], + ], ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" - shelly.definition.additional_inp_B = 2 + shelly.file1 = "new_file_1.txt" + shelly.additional_inp_B = 2 res = shelly() - assert res.output.stdout == "" - assert res.output.newfile1.fspath.exists() + assert res.outputs.stdout == "" + assert res.outputs.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_5b(): @@ -4000,51 +2974,35 @@ def test_shell_cmd_inputspec_outputspec_5b(): neither of the list from requirements has all the fields set, so the output is NOTHING """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", str, {"help": "additional inp A"}), - ("additional_inp_B", str, {"help": "additional inp B"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - # requires is a list of list so it's treated as el[0] OR el[1] OR... - "requires": [ - ["file1", "additional_inp_A"], - ["file1", "additional_inp_B"], - ], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str = shell.arg(help="additional inp A") + additional_inp_B: str = shell.arg(help="additional inp B") + + class Outputs(ShellOutputs): + + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + # requires is a list of list so it's treated as el[0] OR el[1] OR... + requires=[ + ["file1", "additional_inp_A"], + ["file1", "additional_inp_B"], + ], ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" + shelly.file1 = "new_file_1.txt" res = shelly() - assert res.output.stdout == "" + assert res.outputs.stdout == "" # neither additional_inp_A nor additional_inp_B is set, so newfile1 is NOTHING - assert res.output.newfile1 is attr.NOTHING + assert res.outputs.newfile1 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_6_except(): @@ -4053,42 +3011,25 @@ def test_shell_cmd_inputspec_outputspec_6_except(): requires has invalid syntax - exception is raised """ cmd = ["touch", "newfile_tmp.txt"] - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file1", - str, - {"help": "1st creadted file", "argstr": "", "position": 1}, - ), - ("additional_inp_A", str, {"help": "additional inp A"}), - ], - bases=(ShellDef,), - ) - my_output_spec = SpecInfo( - name="Output", - fields=[ - ( - "newfile1", - File, - { - "output_file_template": "{file1}", - "help": "newfile 1", - # requires has invalid syntax - "requires": [["file1", "additional_inp_A"], "file1"], - }, + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = cmd + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str = shell.arg(help="additional inp A") + + class Outputs(ShellOutputs): + newfile1: File = shell.outarg( + path_template="{file1}", + help="newfile 1", + # requires has invalid syntax + requires=[["file1", "additional_inp_A"], "file1"], ) - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - name="shelly", + + shelly = Shelly( executable=cmd, - input_spec=my_input_spec, - output_spec=my_output_spec, ) - shelly.definition.file1 = "new_file_1.txt" + shelly.file1 = "new_file_1.txt" with pytest.raises(Exception, match="requires field can be"): shelly() @@ -4117,223 +3058,113 @@ def change_name(file): name, ext = os.path.splitext(file) return f"{name}_brain.{ext}" - bet_input_spec = SpecInfo( - name="Input", - # TODO: change the position?? - fields=[ - ( - "in_file", - attr.ib( - type=File, - metadata={ - "help": "input file to skull strip", - "position": 1, - "mandatory": True, - "argstr": "", - }, - ), - ), - ( - "out_file", - attr.ib( - type=str, - metadata={ - "help": "name of output skull stripped image", - "position": 2, - "argstr": "", - "output_file_template": "{in_file}_brain", - }, - ), - ), - ( - "outline", - attr.ib( - type=bool, - metadata={ - "help": "create surface outline image", - "argstr": "-o", - }, - ), - ), - ( - "mask", - attr.ib( - type=bool, - metadata={ - "help": "create binary mask image", - "argstr": "-m", - }, - ), - ), - ( - "skull", - attr.ib( - type=bool, - metadata={"help": "create skull image", "argstr": "-s"}, - ), - ), - ( - "no_output", - attr.ib( - type=bool, - metadata={ - "help": "Don't generate segmented output", - "argstr": "-n", - }, - ), - ), - ( - "frac", - attr.ib( - type=float, - metadata={ - "help": "fractional intensity threshold", - "argstr": "-f", - }, - ), - ), - ( - "vertical_gradient", - attr.ib( - type=float, - metadata={ - "help": "vertical gradient in fractional intensity threshold (-1, 1)", - "argstr": "-g", - "allowed_values": {"min_val": -1, "max_val": 1}, - }, - ), - ), - ( - "radius", - attr.ib(type=int, metadata={"argstr": "-r", "help": "head radius"}), - ), - ( - "center", - attr.ib( - type=ty.List[int], - metadata={ - "help": "center of gravity in voxels", - "argstr": "-c", - "allowed_values": {"min_value": 0, "max_value": 3}, - }, - ), - ), - ( - "threshold", - attr.ib( - type=bool, - metadata={ - "argstr": "-t", - "help": "apply thresholding to segmented brain image and mask", - }, - ), - ), - ( - "mesh", - attr.ib( - type=bool, - metadata={ - "argstr": "-e", - "help": "generate a vtk mesh brain surface", - }, - ), - ), - ( - "robust", - attr.ib( - type=bool, - metadata={ - "help": "robust brain centre estimation (iterates BET several times)", - "argstr": "-R", - "xor": _xor_inputs, - }, - ), - ), - ( - "padding", - attr.ib( - type=bool, - metadata={ - "help": "improve BET if FOV is very small in Z (by temporarily padding end slices", - "argstr": "-Z", - "xor": _xor_inputs, - }, - ), - ), - ( - "remove_eyes", - attr.ib( - type=bool, - metadata={ - "help": "eye & optic nerve cleanup (can be useful in SIENA)", - "argstr": "-S", - "xor": _xor_inputs, - }, - ), - ), - ( - "surfaces", - attr.ib( - type=bool, - metadata={ - "help": "run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", - "argstr": "-A", - "xor": _xor_inputs, - }, - ), - ), - ( - "t2_guided", - attr.ib( - type=ty.Union[File, str], - metadata={ - "help": "as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", - "argstr": "-A2", - "xor": _xor_inputs, - }, - ), - ), - ( - "functional", - attr.ib( - type=bool, - metadata={ - "argstr": "-F", - "xor": _xor_inputs, - "help": "apply to 4D fMRI data", - }, - ), - ), - ( - "reduce_bias", - attr.ib( - type=bool, - metadata={ - "argstr": "-B", - "xor": _xor_inputs, - "help": "bias field and neck cleanup", - }, - ), - ), - # ("number_classes", int, attr.ib(metadata={"help": 'number of tissue-type classes', "argstr": '-n', - # "allowed_values": {"min_val": 1, "max_val": 10}})), - # ("output_biasfield", bool, - # attr.ib(metadata={"help": 'output estimated bias field', "argstr": '-b'})), - # ("output_biascorrected", bool, - # attr.ib(metadata={"help": 'output restored image (bias-corrected image)', "argstr": '-B'})), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "bet" + in_file: File = shell.arg( + help="input file to skull strip", + position=1, + mandatory=True, + argstr="", + ) + + outline: bool = shell.arg( + help="create surface outline image", + argstr="-o", + ) + mask: bool = shell.arg( + help="create binary mask image", + argstr="-m", + ) + skull: bool = shell.arg( + help="create skull image", + argstr="-s", + ) + no_output: bool = shell.arg( + help="Don't generate segmented output", + argstr="-n", + ) + frac: float = shell.arg( + help="fractional intensity threshold", + argstr="-f", + ) + vertical_gradient: float = shell.arg( + help="vertical gradient in fractional intensity threshold (-1, 1)", + argstr="-g", + allowed_values={"min_val": -1, "max_val": 1}, + ) + radius: int = shell.arg(argstr="-r", help="head radius") + center: ty.List[int] = shell.arg( + help="center of gravity in voxels", + argstr="-c", + allowed_values={"min_value": 0, "max_value": 3}, + ) + threshold: bool = shell.arg( + argstr="-t", + help="apply thresholding to segmented brain image and mask", + ) + mesh: bool = shell.arg( + argstr="-e", + help="generate a vtk mesh brain surface", + ) + robust: bool = shell.arg( + help="robust brain centre estimation (iterates BET several times)", + argstr="-R", + xor=_xor_inputs, + ) + padding: bool = shell.arg( + help="improve BET if FOV is very small in Z (by temporarily padding end slices", + argstr="-Z", + xor=_xor_inputs, + ) + remove_eyes: bool = shell.arg( + help="eye & optic nerve cleanup (can be useful in SIENA)", + argstr="-S", + xor=_xor_inputs, + ) + surfaces: bool = shell.arg( + help="run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", + argstr="-A", + xor=_xor_inputs, + ) + t2_guided: ty.Union[File, str] = shell.arg( + help="as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", + argstr="-A2", + xor=_xor_inputs, + ) + functional: bool = shell.arg( + argstr="-F", + xor=_xor_inputs, + help="apply to 4D fMRI data", + ) + reduce_bias: bool = shell.arg( + argstr="-B", + xor=_xor_inputs, + help="bias field and neck cleanup", + ) + + class Outputs(ShellOutputs): + out_file: str = shell.outarg( + help="name of output skull stripped image", + position=2, + argstr="", + path_template="{in_file}_brain", + ) + + # ("number_classes", int, attr.ib(metadata={help='number of tissue-type classes', argstr='-n', + # allowed_values={"min_val": 1, max_val=10}})), + # ("output_biasfield", bool, + # attr.ib(metadata={help='output estimated bias field', argstr='-b'})), + # ("output_biascorrected", bool, + # attr.ib(metadata={help='output restored image (bias-corrected image)', argstr='-B'})), # TODO: not sure why this has to be string in_file = data_tests_dir / "test.nii.gz" # separate command into exec + args - shelly = ShellDef( - name="bet_task", executable="bet", in_file=in_file, input_spec=bet_input_spec - ) + shelly = Shelly(in_file=in_file) out_file = shelly.output_dir / "test_brain.nii.gz" - assert shelly.definition.executable == "bet" + assert shelly.executable == "bet" assert shelly.cmdline == f"bet {in_file} {out_file}" # res = shelly(plugin="cf") @@ -4342,45 +3173,27 @@ def test_shell_cmd_optional_output_file1(tmp_path): """ Test to see that 'unused' doesn't complain about not having an output passed to it """ - my_cp_spec = SpecInfo( - name="Input", - fields=[ - ( - "input", - attr.ib(type=File, metadata={"argstr": "", "help": "input file"}), - ), - ( - "output", - attr.ib( - type=Path, - metadata={ - "argstr": "", - "output_file_template": "out.txt", - "help": "output file", - }, - ), - ), - ( - "unused", - attr.ib( - type=ty.Union[Path, bool], - default=False, - metadata={ - "argstr": "--not-used", - "output_file_template": "out.txt", - "help": "dummy output", - }, - ), - ), - ], - bases=(ShellDef,), - ) - my_cp = ShellDef( - name="my_cp", - executable="cp", - input_spec=my_cp_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + input: File = shell.arg(argstr="", help="input file") + + executable = "cp" + + class Outputs(ShellOutputs): + output: File = shell.outarg( + argstr="", + path_template="out.txt", + help="output file", + ) + unused: File | None = shell.outarg( + default=False, + argstr="--not-used", + path_template="out.txt", + help="dummy output", + ) + + my_cp = ShellDef() file1 = tmp_path / "file1.txt" file1.write_text("foo") outputs = my_cp(input=file1, unused=False) @@ -4391,34 +3204,21 @@ def test_shell_cmd_optional_output_file2(tmp_path): """ Test to see that 'unused' doesn't complain about not having an output passed to it """ - my_cp_spec = SpecInfo( - name="Input", - fields=[ - ( - "input", - attr.ib(type=File, metadata={"argstr": "", "help": "input file"}), - ), - ( - "output", - attr.ib( - type=ty.Union[Path, bool], - default=False, - metadata={ - "argstr": "", - "output_file_template": "out.txt", - "help": "dummy output", - }, - ), - ), - ], - bases=(ShellDef,), - ) - my_cp = ShellDef( - name="my_cp", - executable="cp", - input_spec=my_cp_spec, - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "cp" + + input: File = shell.arg(argstr="", help="input file") + + class Outputs(ShellOutputs): + output: File = shell.outarg( + argstr="", + path_template="out.txt", + help="dummy output", + ) + + my_cp = Shelly() file1 = tmp_path / "file1.txt" file1.write_text("foo") outputs = my_cp(input=file1, output=True) @@ -4433,253 +3233,145 @@ def test_shell_cmd_optional_output_file2(tmp_path): def test_shell_cmd_non_existing_outputs_1(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help": """ - base name of the pretend outputs. - """, - "mandatory": True, - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + out_name: str = shell.arg( + help=""" + base name of the pretend outputs. + """, + mandatory=True, + ) + + class Outputs(ShellOutputs): + out_1: File = shell.outarg( + help="fictional output #1", + path_template="{out_name}_1.nii", + ) + out_2: File = shell.outarg( + help="fictional output #2", + path_template="{out_name}_2.nii", ) - ], - bases=(ShellDef,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - }, - ), - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - cache_dir=tmp_path, + shelly = Shelly( executable="echo", - input_spec=input_spec, - output_spec=out_spec, out_name="test", ) shelly() res = shelly.result() - assert res.output.out_1 == attr.NOTHING and res.output.out_2 == attr.NOTHING + assert res.outputs.out_1 == attr.NOTHING and res.outputs.out_2 == attr.NOTHING def test_shell_cmd_non_existing_outputs_2(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead. This test has one existing and one non existing output file. """ - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help": """ - base name of the pretend outputs. - """, - "mandatory": True, - "argstr": "{out_name}_1.nii", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + out_name: str = shell.arg( + help=""" + base name of the pretend outputs. + """, + mandatory=True, + argstr="{out_name}_1.nii", + ) + + class Outputs(ShellOutputs): + out_1: File = shell.outarg( + help="fictional output #1", + path_template="{out_name}_1.nii", + ) + out_2: File = shell.outarg( + help="fictional output #2", + path_template="{out_name}_2.nii", ) - ], - bases=(ShellDef,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - }, - ), - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - cache_dir=tmp_path, + shelly = Shelly( executable="touch", - input_spec=input_spec, - output_spec=out_spec, out_name="test", ) shelly() res = shelly.result() # the first output file is created - assert res.output.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") - assert res.output.out_1.fspath.exists() + assert res.outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert res.outputs.out_1.fspath.exists() # the second output file is not created - assert res.output.out_2 == attr.NOTHING + assert res.outputs.out_2 == attr.NOTHING def test_shell_cmd_non_existing_outputs_3(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead. This test has an existing mandatory output and another non existing output file. """ - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help": """ - base name of the pretend outputs. - """, - "mandatory": True, - "argstr": "{out_name}_1.nii", - }, - ), + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + out_name: str = shell.arg( + help=""" + base name of the pretend outputs. + """, + mandatory=True, + argstr="{out_name}_1.nii", + ) + + class Outputs(ShellOutputs): + out_1: File = shell.outarg( + help="fictional output #1", + path_template="{out_name}_1.nii", + mandatory=True, + ) + out_2: File = shell.outarg( + help="fictional output #2", + path_template="{out_name}_2.nii", ) - ], - bases=(ShellDef,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - "mandatory": True, - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - }, - ), - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - cache_dir=tmp_path, + shelly = Shelly( executable="touch", - input_spec=input_spec, - output_spec=out_spec, out_name="test", ) shelly() res = shelly.result() # the first output file is created - assert res.output.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") - assert res.output.out_1.fspath.exists() + assert res.outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert res.outputs.out_1.fspath.exists() # the second output file is not created - assert res.output.out_2 == attr.NOTHING + assert res.outputs.out_2 == attr.NOTHING def test_shell_cmd_non_existing_outputs_4(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead. This test has an existing mandatory output and another non existing mandatory output file.""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=str, - metadata={ - "help": """ + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + out_name: str = shell.arg( + help=""" base name of the pretend outputs. """, - "mandatory": True, - "argstr": "{out_name}_1.nii", - }, - ), + mandatory=True, + argstr="{out_name}_1.nii", + ) + + class Outputs(ShellOutputs): + out_1: File = shell.outarg( + help="fictional output #1", + path_template="{out_name}_1.nii", + mandatory=True, + ) + out_2: File = shell.outarg( + help="fictional output #2", + path_template="{out_name}_2.nii", + mandatory=True, ) - ], - bases=(ShellDef,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_1", - attr.ib( - type=File, - metadata={ - "help": "fictional output #1", - "output_file_template": "{out_name}_1.nii", - "mandatory": True, - }, - ), - ), - ( - "out_2", - attr.ib( - type=File, - metadata={ - "help": "fictional output #2", - "output_file_template": "{out_name}_2.nii", - "mandatory": True, - }, - ), - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - cache_dir=tmp_path, + shelly = Shelly( executable="touch", - input_spec=input_spec, - output_spec=out_spec, out_name="test", ) # An exception should be raised because the second mandatory output does not exist @@ -4692,108 +3384,65 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): """This test looks if non existing files of an multiOuputFile are also set to NOTHING""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=MultiInputObj, - metadata={ - "help": """ + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "echo" + out_name: MultiInputObj = shell.arg( + help=""" base name of the pretend outputs. """, - "mandatory": True, - "argstr": "...", - }, - ), + mandatory=True, + argstr="...", + ) + + class Outputs(ShellOutputs): + out_list: MultiOutputFile = shell.outarg( + help="fictional output #1", + path_template="{out_name}", ) - ], - bases=(ShellDef,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_list", - attr.ib( - type=MultiOutputFile, - metadata={ - "help": "fictional output #1", - "output_file_template": "{out_name}", - }, - ), - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - cache_dir=tmp_path, - executable="echo", - input_spec=input_spec, - output_spec=out_spec, + shelly = Shelly( out_name=["test_1.nii", "test_2.nii"], ) shelly() res = shelly.result() # checking if the outputs are Nothing - assert res.output.out_list[0] == attr.NOTHING - assert res.output.out_list[1] == attr.NOTHING + assert res.outputs.out_list[0] == attr.NOTHING + assert res.outputs.out_list[1] == attr.NOTHING def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): """This test looks if non existing files of an multiOutputFile are also set to NOTHING. It checks that it also works if one file of the multiOutputFile actually exists.""" - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "out_name", - attr.ib( - type=MultiInputObj, - metadata={ - "help": """ + + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + + out_name: MultiInputObj = shell.arg( + help=""" base name of the pretend outputs. """, - "sep": " test_1_real.nii", # hacky way of creating an extra file with that name - "mandatory": True, - "argstr": "...", - }, - ), + sep=" test_1_real.nii", # hacky way of creating an extra file with that name + mandatory=True, + argstr="...", + ) + + class Outputs(ShellOutputs): + out_list: MultiOutputFile = shell.outarg( + help="fictional output #1", + path_template="{out_name}_real.nii", ) - ], - bases=(ShellDef,), - ) - out_spec = SpecInfo( - name="Output", - fields=[ - ( - "out_list", - attr.ib( - type=MultiOutputFile, - metadata={ - "help": "fictional output #1", - "output_file_template": "{out_name}_real.nii", - }, - ), - ), - ], - bases=(ShellOutputs,), - ) - shelly = ShellDef( - cache_dir=tmp_path, + shelly = Shelly( executable="touch", - input_spec=input_spec, - output_spec=out_spec, out_name=["test_1", "test_2"], ) shelly() res = shelly.result() # checking if the outputs are Nothing - assert res.output.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") - assert res.output.out_list[1] == attr.NOTHING + assert res.outputs.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") + assert res.outputs.out_list[1] == attr.NOTHING @pytest.mark.xfail( @@ -4805,63 +3454,40 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): def test_shellspec_formatter_1(tmp_path): """test the input callable 'formatter'.""" - def spec_info(formatter): - return SpecInfo( - name="Input", - fields=[ - ( - "in1", - attr.ib( - type=str, - metadata={ - "help": """ - just a dummy name - """, - "mandatory": True, - }, - ), - ), - ( - "in2", - attr.ib( - type=str, - metadata={ - "help": """ - just a dummy name - """, - "mandatory": True, - }, - ), - ), - ( - "together", - attr.ib( - type=ty.List, - metadata={ - "help": """ - combines in1 and in2 into a list - """, - # When providing a formatter all other metadata options are discarded. - "formatter": formatter, - }, - ), - ), - ], - bases=(ShellDef,), - ) - def formatter_1(inputs): print("FORMATTER:", inputs) return f"-t [{inputs['in1']}, {inputs['in2']}]" - input_spec = spec_info(formatter_1) - shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + def make_shelly(formatter): + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "exec" + in1: str = shell.arg( + help=""" + just a dummy name + """, + mandatory=True, + ) + in2: str = shell.arg( + help=""" + just a dummy name + """, + mandatory=True, + ) + together: ty.List = shell.arg( + help=""" + combines in1 and in2 into a list + """, + # When providing a formatter all other metadata options are discarded. + formatter=formatter, + ) + + Shelly = make_shelly(formatter=formatter_1) + shelly = Shelly(in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" # testing that the formatter can overwrite a provided value for together. - shelly = ShellDef( - executable="exec", - input_spec=input_spec, + shelly = Shelly( in1="i1", in2="i2", together=[1], @@ -4873,18 +3499,18 @@ def formatter_2(in1, in2): print("FORMATTER:", in1, in2) return f"-t [{in1}, {in2}]" - input_spec = spec_info(formatter_2) + Shelly = make_shelly(formatter_2) - shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = Shelly(in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" def formatter_3(in1, in3): print("FORMATTER:", in1, in3) return f"-t [{in1}, {in3}]" - input_spec = spec_info(formatter_3) + Shelly = make_shelly(formatter_3) - shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = Shelly(in1="i1", in2="i2") with pytest.raises(Exception) as excinfo: shelly.cmdline assert ( @@ -4898,11 +3524,9 @@ def formatter_5(field): # formatter must return a string return field - input_spec = spec_info(formatter_5) + Shelly = make_shelly(formatter_5) - shelly = ShellDef( - executable="exec", - input_spec=input_spec, + shelly = Shelly( in1="i1", in2="i2", # together="-t test", @@ -4915,63 +3539,38 @@ def formatter_4(field): # formatter must return a string return "" - input_spec = spec_info(formatter_4) + Shelly = make_shelly(formatter_4) - shelly = ShellDef(executable="exec", input_spec=input_spec, in1="i1", in2="i2") + shelly = Shelly(in1="i1", in2="i2") assert shelly.cmdline == "exec" def test_shellspec_formatter_splitter_2(tmp_path): """test the input callable 'formatter' when a splitter is used on an argument of the formatter.""" - def spec_info(formatter): - return SpecInfo( - name="Input", - fields=[ - ( - "in1", - attr.ib( - type=str, - metadata={ - "help": "in1", - }, - ), - ), - ( - "in2", - attr.ib( - type=str, - metadata={ - "help": "in2", - }, - ), - ), - ( - "together", - attr.ib( - type=ty.List, - metadata={ - "help": """ - uses in1 - """, - # When providing a formatter all other metadata options are discarded. - "formatter": formatter, - }, - ), - ), - ], - bases=(ShellDef,), - ) - # asking for specific inputs def formatter_1(in1, in2): return f"-t [{in1} {in2}]" - input_spec = spec_info(formatter_1) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "executable" + in1: str = shell.arg( + help="in1", + ) + in2: str = shell.arg( + help="in2", + ) + together: ty.List = shell.arg( + help=""" + uses in1 + """, + # When providing a formatter all other metadata options are discarded. + formatter=formatter_1, + ) + in1 = ["in11", "in12"] - shelly = ShellDef( - name="f", executable="executable", input_spec=input_spec, in2="in2" - ).split("in1", in1=in1) + shelly = Shelly(in2="in2").split("in1", in1=in1) assert shelly is not None # results = shelly.cmdline @@ -5004,21 +3603,14 @@ def test_shellcommand_error_msg(tmp_path): ), ) - input_spec = SpecInfo( - name="Input", - fields=[ - ( - "in1", - str, - {"help": "a dummy string", "argstr": "", "mandatory": True}, - ), - ], - bases=(ShellDef,), - ) + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): - shelly = ShellDef( - name="err_msg", executable=str(script_path), input_spec=input_spec, in1="hello" - ) + executable = script_path + + in1: str = shell.arg(help="a dummy string", argstr="", mandatory=True) + + shelly = Shelly(in1="hello") with pytest.raises(RuntimeError) as excinfo: shelly() From 753f0afcac1bad3daf57c328568ae73256a477cd Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 10:44:19 +1100 Subject: [PATCH 296/342] removing shelly.result\(\) --- pydra/engine/tests/test_shelltask.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 5579298120..fb69cf1dec 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1766,7 +1766,6 @@ def Workflow(cmd1, cmd2, args): with Submitter(worker=plugin) as sub: res = sub(wf) - res = wf.result() assert res.outputs.out1 == "" assert res.outputs.touch_file.fspath.exists() assert res.outputs.touch_file.fspath.parent == wf.output_dir @@ -1828,7 +1827,6 @@ def Workflow(cmd1, cmd2, args): with Submitter(worker=plugin) as sub: res = sub(wf) - res = wf.result() assert res.outputs.out1 == "" assert res.outputs.touch_file.fspath.exists() assert res.outputs.out2 == "" @@ -1951,7 +1949,6 @@ def Workflow(cmd1, cmd2, args): with Submitter(worker=plugin) as sub: res = sub(wf) - res = wf.result() assert res.outputs.out1 == ["", ""] assert all([file.fspath.exists() for file in res.outputs.touch_file]) assert res.outputs.out2 == ["", ""] @@ -3258,9 +3255,8 @@ class Outputs(ShellOutputs): executable="echo", out_name="test", ) - shelly() - res = shelly.result() - assert res.outputs.out_1 == attr.NOTHING and res.outputs.out_2 == attr.NOTHING + outputs = shelly() + assert outputs.out_1 == attr.NOTHING and outputs.out_2 == attr.NOTHING def test_shell_cmd_non_existing_outputs_2(tmp_path): @@ -3293,13 +3289,12 @@ class Outputs(ShellOutputs): executable="touch", out_name="test", ) - shelly() - res = shelly.result() + outputs = shelly() # the first output file is created - assert res.outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") - assert res.outputs.out_1.fspath.exists() + assert outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert outputs.out_1.fspath.exists() # the second output file is not created - assert res.outputs.out_2 == attr.NOTHING + assert outputs.out_2 == attr.NOTHING def test_shell_cmd_non_existing_outputs_3(tmp_path): From 0dba80b25d8faf8c71ea754675432075d06b9e65 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 10:55:22 +1100 Subject: [PATCH 297/342] more cleaning of test_shelltask --- pydra/engine/tests/test_shelltask.py | 517 +++++++++++++-------------- 1 file changed, 253 insertions(+), 264 deletions(-) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index fb69cf1dec..19663ba42a 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -34,10 +34,10 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): shelly = shell.define(cmd)() assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin=plugin_dask_opt, cache_dir=tmp_path) - assert Path(res.outputs.stdout.rstrip()).parent == tmp_path - assert res.outputs.return_code == 0 - assert res.outputs.stderr == "" + outputs = results_function(shelly, worker=plugin_dask_opt, cache_dir=tmp_path) + assert Path(outputs.stdout.rstrip()).parent == tmp_path + assert outputs.return_code == 0 + assert outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -50,10 +50,10 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert Path(res.outputs.stdout) == Path(shelly.output_dir) - assert res.outputs.return_code == 0 - assert res.outputs.stderr == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert Path(outputs.stdout) == Path(shelly.output_dir) + assert outputs.return_code == 0 + assert outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -64,10 +64,10 @@ def test_shell_cmd_2(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout.strip() == " ".join(cmd[1:]) - assert res.outputs.return_code == 0 - assert res.outputs.stderr == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout.strip() == " ".join(cmd[1:]) + assert outputs.return_code == 0 + assert outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -81,10 +81,10 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout.strip() == " ".join(cmd_args) - assert res.outputs.return_code == 0 - assert res.outputs.stderr == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout.strip() == " ".join(cmd_args) + assert outputs.return_code == 0 + assert outputs.stderr == "" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -98,10 +98,10 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo pydra" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "pydra\n" - assert res.outputs.return_code == 0 - assert res.outputs.stderr == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "pydra\n" + assert outputs.return_code == 0 + assert outputs.stderr == "" # tests with State @@ -118,15 +118,15 @@ def test_shell_cmd_3(plugin_dask_opt, tmp_path): shelly = shell.define("placeholder")().split("executable", executable=cmd) # assert shelly.cmdline == ["pwd", "whoami"] - res = shelly(plugin=plugin_dask_opt) - assert Path(res.outputs.stdout[0].rstrip()) == shelly.output_dir[0] + outputs = shelly(worker=plugin_dask_opt) + assert Path(outputs.stdout[0].rstrip()) == shelly.output_dir[0] if "USER" in os.environ: - assert res.outputs.stdout[1] == f"{os.environ['USER']}\n" + assert outputs.stdout[1] == f"{os.environ['USER']}\n" else: - assert res.outputs.stdout[1] - assert res.outputs.return_code[0] == res.outputs.return_code[1] == 0 - assert res.outputs.stderr[0] == res.outputs.stderr[1] == "" + assert outputs.stdout[1] + assert outputs.return_code[0] == outputs.return_code[1] == 0 + assert outputs.stderr[0] == outputs.stderr[1] == "" def test_shell_cmd_4(plugin, tmp_path): @@ -141,13 +141,13 @@ def test_shell_cmd_4(plugin, tmp_path): assert shelly.inputs.executable == "echo" assert shelly.inputs.args == ["nipype", "pydra"] # assert shelly.cmdline == ["echo nipype", "echo pydra"] - res = shelly(plugin=plugin) + outputs = shelly(worker=plugin) - assert res.outputs.stdout[0] == "nipype\n" - assert res.outputs.stdout[1] == "pydra\n" + assert outputs.stdout[0] == "nipype\n" + assert outputs.stdout[1] == "pydra\n" - assert res.outputs.return_code[0] == res.outputs.return_code[1] == 0 - assert res.outputs.stderr[0] == res.outputs.stderr[1] == "" + assert outputs.return_code[0] == outputs.return_code[1] == 0 + assert outputs.stderr[0] == outputs.stderr[1] == "" def test_shell_cmd_5(plugin, tmp_path): @@ -164,10 +164,10 @@ def test_shell_cmd_5(plugin, tmp_path): assert shelly.inputs.executable == "echo" assert shelly.inputs.args == ["nipype", "pydra"] # assert shelly.cmdline == ["echo nipype", "echo pydra"] - res = shelly(plugin=plugin) + outputs = shelly(worker=plugin) - assert res.outputs.stdout[0] == "nipype\n" - assert res.outputs.stdout[1] == "pydra\n" + assert outputs.stdout[0] == "nipype\n" + assert outputs.stdout[1] == "pydra\n" def test_shell_cmd_6(plugin, tmp_path): @@ -189,25 +189,25 @@ def test_shell_cmd_6(plugin, tmp_path): # "echo -n nipype", # "echo -n pydra", # ] - res = shelly(plugin=plugin) + outputs = shelly(worker=plugin) - assert res.outputs.stdout[0] == "nipype\n" - assert res.outputs.stdout[1] == "pydra\n" - assert res.outputs.stdout[2] == "nipype" - assert res.outputs.stdout[3] == "pydra" + assert outputs.stdout[0] == "nipype\n" + assert outputs.stdout[1] == "pydra\n" + assert outputs.stdout[2] == "nipype" + assert outputs.stdout[3] == "pydra" assert ( - res.outputs.return_code[0] - == res.outputs.return_code[1] - == res.outputs.return_code[2] - == res.outputs.return_code[3] + outputs.return_code[0] + == outputs.return_code[1] + == outputs.return_code[2] + == outputs.return_code[3] == 0 ) assert ( - res.outputs.stderr[0] - == res.outputs.stderr[1] - == res.outputs.stderr[2] - == res.outputs.stderr[3] + outputs.stderr[0] + == outputs.stderr[1] + == outputs.stderr[2] + == outputs.stderr[3] == "" ) @@ -228,13 +228,12 @@ def test_shell_cmd_7(plugin, tmp_path): assert shelly.inputs.executable == ["echo", ["echo", "-n"]] assert shelly.inputs.args == ["nipype", "pydra"] - res = shelly(plugin=plugin) + outputs = shelly(worker=plugin) - assert res[0][0].output.stdout == "nipype\n" - assert res[0][1].output.stdout == "pydra\n" - - assert res[1][0].output.stdout == "nipype" - assert res[1][1].output.stdout == "pydra" + assert outputs.stdout[0][0] == "nipype\n" + assert outputs.stdout[0][1] == "pydra" + assert outputs.stdout[1][0] == "nipype" + assert outputs.stdout[1][1] == "pydra" # tests with workflows @@ -289,8 +288,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "hello from pydra" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "hello from pydra" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -327,8 +326,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.executable == cmd_exec assert shelly.args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "HELLO from pydra" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO from pydra" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -353,8 +352,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "HELLO\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -376,8 +375,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "HELLO\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -402,8 +401,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "HELLO\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "HELLO\n" def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): @@ -448,8 +447,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -473,8 +472,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "Hello\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "Hello\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -495,8 +494,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "Hello\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "Hello\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -520,8 +519,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hi" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "Hi\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "Hi\n" def test_shell_cmd_inputspec_4c_exception(plugin): @@ -592,7 +591,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): shelly = Shelly(opt_t=cmd_t) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -t" - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, worker=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): @@ -651,7 +650,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): shelly = Shelly(opt_t=cmd_t, opt_l=cmd_l) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, worker=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_6a_exception(plugin): @@ -714,7 +713,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): shelly.opt_l = cmd_l assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, worker=plugin, cache_dir=tmp_path) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -738,9 +737,9 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, additional_args=args) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - out1 = res.outputs.out1.fspath + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + out1 = outputs.out1.fspath assert out1.exists() # checking if the file is created in a good place assert shelly.output_dir == out1.parent @@ -773,11 +772,11 @@ class Outputs(ShellOutputs): additional_args=args, ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == res.outputs.out1_changed.fspath.parent - assert res.outputs.out1_changed.fspath.name == "newfile_tmp.txt" + assert shelly.output_dir == outputs.out1_changed.fspath.parent + assert outputs.out1_changed.fspath.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -805,9 +804,9 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, newfile="newfile_tmp.txt") - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out1.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -831,11 +830,11 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, additional_args=args) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == res.outputs.out1.fspath.parent - assert res.outputs.out1.fspath.name == "newfile_tmp.txt" + assert shelly.output_dir == outputs.out1.fspath.parent + assert outputs.out1.fspath.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -872,9 +871,9 @@ class Outputs(ShellOutputs): time="02121010", ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out1.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -911,9 +910,9 @@ class Outputs(ShellOutputs): time="02121010", ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out1.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -949,12 +948,12 @@ class Outputs(ShellOutputs): file_orig=file, ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.file_copy.fspath.exists() - assert res.outputs.file_copy.fspath.name == "file_copy.txt" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.outputs.file_copy.fspath.parent + assert shelly.output_dir == outputs.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [run_no_submitter]) @@ -987,12 +986,12 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, file_orig=file) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.file_copy.fspath.exists() - assert res.outputs.file_copy.fspath.name == "file_copy.txt" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.outputs.file_copy.fspath.parent + assert shelly.output_dir == outputs.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1027,10 +1026,10 @@ class Outputs(ShellOutputs): file_orig=file, ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.file_copy.fspath.exists() - assert res.outputs.file_copy.fspath.name == "file_copy" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file_copy" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1066,11 +1065,11 @@ class Outputs(ShellOutputs): file_orig=file, ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.file_copy.fspath.exists() - assert res.outputs.file_copy.fspath.name == "file" - assert res.outputs.file_copy.fspath.parent == shelly.output_dir + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "file" + assert outputs.file_copy.fspath.parent == shelly.output_dir @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1107,12 +1106,12 @@ class Outputs(ShellOutputs): file_copy="my_file_copy.txt", ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.file_copy.fspath.exists() - assert res.outputs.file_copy.fspath.name == "my_file_copy.txt" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.file_copy.fspath.exists() + assert outputs.file_copy.fspath.name == "my_file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.outputs.file_copy.fspath.parent + assert shelly.output_dir == outputs.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1145,8 +1144,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) assert shelly.executable == cmd_exec - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "hello from boston" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "hello from boston" def test_shell_cmd_inputspec_10_err(tmp_path): @@ -1262,9 +1261,9 @@ class Outputs(ShellOutputs): number=2, ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - fspath = res.outputs.file_copy.fspath + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + fspath = outputs.file_copy.fspath assert fspath.exists() assert fspath.name == "file_even.txt" # checking if it's created in a good place @@ -1325,12 +1324,12 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, orig_file=str(file)) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out_file.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.outputs.out_file.fspath.parent == shelly.output_dir - with open(res.outputs.out_file) as f: + assert outputs.out_file.fspath.parent == shelly.output_dir + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged with open(file) as f: @@ -1368,21 +1367,21 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, orig_file=str(file)) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out_file.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying - assert res.outputs.out_file.fspath.parent == shelly.output_dir + assert outputs.out_file.fspath.parent == shelly.output_dir - assert res.outputs.out_file.fspath.parent.joinpath( - res.outputs.out_file.fspath.name + "s" + assert outputs.out_file.fspath.parent.joinpath( + outputs.out_file.fspath.name + "s" ).exists() - with open(res.outputs.out_file) as f: + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the file is uses a soft link, but it creates and an extra copy # it might depend on the OS - linked_file_copy = res.outputs.out_file.fspath.parent.joinpath( - res.outputs.out_file.fspath.name + "s" + linked_file_copy = outputs.out_file.fspath.parent.joinpath( + outputs.out_file.fspath.name + "s" ) if linked_file_copy.exists(): with open(linked_file_copy) as f: @@ -1430,12 +1429,12 @@ class Outputs(ShellOutputs): orig_file=str(file), ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out_file.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out_file.fspath.exists() # the file is not copied, it is changed in place - assert res.outputs.out_file == file - with open(res.outputs.out_file) as f: + assert outputs.out_file == file + with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() @@ -1460,9 +1459,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.inputs.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout[0] == "HELLO\n" - assert res.outputs.stdout[1] == "hi\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout[0] == "HELLO\n" + assert outputs.stdout[1] == "hi\n" def test_shell_cmd_inputspec_typeval_1(): @@ -1521,9 +1520,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): shelly = Shelly().split(text=["HELLO", "hi"]) assert shelly.inputs.executable == cmd_exec - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout[0] == "HELLO\n" - assert res.outputs.stdout[1] == "hi\n" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout[0] == "HELLO\n" + assert outputs.stdout[1] == "hi\n" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1546,11 +1545,11 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd).split(args=args) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) for i in range(len(args)): - assert res[i].output.stdout == "" - assert res[i].output.out1.fspath.exists() - assert res[i].output.out1.fspath.parent == shelly.output_dir[i] + assert outputs.stdout[i] == "" + assert outputs.out1[i].fspath.exists() + assert outputs.out1[i].fspath.parent == shelly.output_dir[i] @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1581,9 +1580,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.inputs.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout[0] == "hello from pydra" - assert res.outputs.stdout[1] == "have a nice one" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout[0] == "hello from pydra" + assert outputs.stdout[1] == "have a nice one" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1623,13 +1622,13 @@ class Outputs(ShellOutputs): ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] - res_l = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - for i, res in enumerate(res_l): - assert res.outputs.stdout == "" - assert res.outputs.out_file.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + for i in range(len(files)): + assert outputs.stdout[i] == "" + assert outputs.out_file[i].fspath.exists() # the file is copied, and than it is changed in place - assert res.outputs.out_file.fspath.parent == shelly.output_dir[i] - with open(res.outputs.out_file) as f: + assert outputs.out_file[i].fspath.parent == shelly.output_dir[i] + with open(outputs.out_file[i]) as f: assert f"hi {txt_l[i]}\n" == f.read() # the original file is unchanged with open(files[i]) as f: @@ -1969,9 +1968,9 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): ) shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.newfile.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1991,9 +1990,9 @@ class Outputs(ShellOutputs): shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.newfile.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile.fspath.exists() def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): @@ -2036,9 +2035,9 @@ class Outputs(ShellOutputs): shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.newfile.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.newfile.fspath.exists() def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): @@ -2082,11 +2081,11 @@ class Outputs(ShellOutputs): shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.outputs.newfile) == 2 - assert all([file.fspath.exists() for file in res.outputs.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.fspath.exists() for file in outputs.newfile]) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2112,11 +2111,11 @@ class Outputs(ShellOutputs): shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.outputs.newfile) == 2 - assert all([file.fspath.exists() for file in res.outputs.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.fspath.exists() for file in outputs.newfile]) assert ( shelly.output_names == shelly._generated_output_names @@ -2148,11 +2147,11 @@ class Outputs(ShellOutputs): shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.outputs.newfile) == 2 - assert all([file.fspath.exists() for file in res.outputs.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.fspath.exists() for file in outputs.newfile]) def test_shell_cmd_outputspec_5b_error(): @@ -2203,11 +2202,11 @@ def gather_output(executable, output_dir): shelly = Shelly() - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" # newfile is a list - assert len(res.outputs.newfile) == 2 - assert all([file.exists() for file in res.outputs.newfile]) + assert len(outputs.newfile) == 2 + assert all([file.exists() for file in outputs.newfile]) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2236,9 +2235,9 @@ class Outputs(ShellOutputs): additional_args=args, ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert res.outputs.stdout == "" - assert res.outputs.out1.fspath.exists() + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() def test_shell_cmd_outputspec_6a(): @@ -2263,9 +2262,9 @@ class Outputs(ShellOutputs): shelly = Shelly(additional_args=args) - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.out1.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.out1.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2309,9 +2308,9 @@ class Outputs(ShellOutputs): files_id=new_files_id, ) - res = results_function(shelly, cache_dir=tmp_path) - assert res.outputs.stdout == "" - for file in res.outputs.new_files: + outputs = results_function(shelly, cache_dir=tmp_path) + assert outputs.stdout == "" + for file in outputs.new_files: assert file.fspath.exists() @@ -2359,9 +2358,9 @@ class Outputs(ShellOutputs): # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_inputspec_11 # see https://github.com/nipype/pydra/issues/671 - res = results_function(shelly, "serial") - assert res.outputs.stdout == "" - assert res.outputs.new_files.fspath.exists() + outputs = results_function(shelly, "serial") + assert outputs.stdout == "" + assert outputs.new_files.fspath.exists() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2404,10 +2403,10 @@ class Outputs(ShellOutputs): shelly = Shelly().split("additional_args", args=args) - results = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - for index, res in enumerate(results): - assert res.outputs.out_file_index == index + 1 - assert res.outputs.stderr_field == f"stderr: {res.outputs.stderr}" + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + for index in range(2): + assert outputs.out_file_index[index] == index + 1 + assert outputs.stderr_field[index] == f"stderr: {outputs.stderr}" def test_shell_cmd_outputspec_8b_error(): @@ -2459,7 +2458,7 @@ class Outputs(ShellOutputs): shelly = Shelly(resultsDir="outdir").split(additional_args=args) - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, worker=plugin, cache_dir=tmp_path) for index, arg_dir in enumerate(args): assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" @@ -2486,11 +2485,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - - executable = cmd - class Outputs(ShellOutputs): resultsDir: Directory = shell.outarg( @@ -2504,10 +2498,10 @@ class Outputs(ShellOutputs): == shelly._generated_output_names == ["return_code", "stdout", "stderr", "resultsDir"] ) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) print("Cache_dirr:", shelly.cache_dir) assert (shelly.output_dir / Path("test")).exists() - assert get_lowest_directory(res.outputs.resultsDir) == get_lowest_directory( + assert get_lowest_directory(outputs.resultsDir) == get_lowest_directory( shelly.output_dir / Path("test") ) @@ -2537,10 +2531,10 @@ class Outputs(ShellOutputs): executable=cmd, ).split("args", args=args) - res = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) for i in range(len(args)): - assert res[i].output.stdout == "" - assert res[i].output.out1.fspath.exists() + assert outputs.stdout[i] == "" + assert outputs.out1[i].fspath.exists() # customised output_spec for tasks in workflows @@ -2616,11 +2610,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): file1: str = shell.arg(help="1st creadted file", argstr="", position=1) file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - - executable = cmd - class Outputs(ShellOutputs): newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") @@ -2631,11 +2620,11 @@ class Outputs(ShellOutputs): ) shelly.file1 = "new_file_1.txt" - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() # newfile2 is not created, since file2 is not provided - assert res.outputs.newfile2 is attr.NOTHING + assert outputs.newfile2 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_2(): @@ -2673,10 +2662,10 @@ class Outputs(ShellOutputs): == ["return_code", "stdout", "stderr", "newfile1", "newfile2"] ) - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() - assert res.outputs.newfile2.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_2a(): @@ -2723,10 +2712,10 @@ class Outputs(ShellOutputs): "newfile1", ] - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() - assert res.outputs.newfile2 is attr.NOTHING + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_3(): @@ -2759,10 +2748,10 @@ class Outputs(ShellOutputs): shelly.file2 = "new_file_2.txt" shelly.additional_inp = 2 - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() - assert res.outputs.newfile2.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() + assert outputs.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_3a(): @@ -2809,11 +2798,11 @@ class Outputs(ShellOutputs): "newfile1", ] - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() # additional input not provided so no newfile2 set (even if the file was created) - assert res.outputs.newfile2 is attr.NOTHING + assert outputs.newfile2 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_4(): @@ -2849,9 +2838,9 @@ class Outputs(ShellOutputs): == ["return_code", "stdout", "stderr", "newfile1"] ) - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_4a(): @@ -2883,9 +2872,9 @@ class Outputs(ShellOutputs): # the value is not in the list from requires shelly.additional_inp = 1 - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1 is attr.NOTHING + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_5(): @@ -2921,9 +2910,9 @@ class Outputs(ShellOutputs): shelly.file1 = "new_file_1.txt" shelly.additional_inp_A = 2 - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_5a(): @@ -2959,9 +2948,9 @@ class Outputs(ShellOutputs): shelly.file1 = "new_file_1.txt" shelly.additional_inp_B = 2 - res = shelly() - assert res.outputs.stdout == "" - assert res.outputs.newfile1.fspath.exists() + outputs = shelly() + assert outputs.stdout == "" + assert outputs.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_5b(): @@ -2996,10 +2985,10 @@ class Outputs(ShellOutputs): ) shelly.file1 = "new_file_1.txt" - res = shelly() - assert res.outputs.stdout == "" + outputs = shelly() + assert outputs.stdout == "" # neither additional_inp_A nor additional_inp_B is set, so newfile1 is NOTHING - assert res.outputs.newfile1 is attr.NOTHING + assert outputs.newfile1 is attr.NOTHING def test_shell_cmd_inputspec_outputspec_6_except(): @@ -3163,7 +3152,7 @@ class Outputs(ShellOutputs): out_file = shelly.output_dir / "test_brain.nii.gz" assert shelly.executable == "bet" assert shelly.cmdline == f"bet {in_file} {out_file}" - # res = shelly(plugin="cf") + # outputs = shelly(plugin="cf") def test_shell_cmd_optional_output_file1(tmp_path): @@ -3329,12 +3318,12 @@ class Outputs(ShellOutputs): out_name="test", ) shelly() - res = shelly.result() + outputs = shelly.result() # the first output file is created - assert res.outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") - assert res.outputs.out_1.fspath.exists() + assert outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert outputs.out_1.fspath.exists() # the second output file is not created - assert res.outputs.out_2 == attr.NOTHING + assert outputs.out_2 == attr.NOTHING def test_shell_cmd_non_existing_outputs_4(tmp_path): @@ -3401,10 +3390,10 @@ class Outputs(ShellOutputs): out_name=["test_1.nii", "test_2.nii"], ) shelly() - res = shelly.result() + outputs = shelly.result() # checking if the outputs are Nothing - assert res.outputs.out_list[0] == attr.NOTHING - assert res.outputs.out_list[1] == attr.NOTHING + assert outputs.out_list[0] == attr.NOTHING + assert outputs.out_list[1] == attr.NOTHING def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): @@ -3434,10 +3423,10 @@ class Outputs(ShellOutputs): out_name=["test_1", "test_2"], ) shelly() - res = shelly.result() + outputs = shelly.result() # checking if the outputs are Nothing - assert res.outputs.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") - assert res.outputs.out_list[1] == attr.NOTHING + assert outputs.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") + assert outputs.out_list[1] == attr.NOTHING @pytest.mark.xfail( From b60a2fb54a42ca2cb660c291cacbe850c997aced Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 16:08:46 +1100 Subject: [PATCH 298/342] fixed error message in missing base class --- pydra/design/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index bedbb26dec..0b237af33d 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -362,8 +362,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: if not issubclass(klass, spec_type): raise ValueError( - f"The canonical form of {spec_type.__module__.split('.')[-1]} task definitions, " - f"{klass}, must inherit from {spec_type}" + f"When using the canonical form for {spec_type.__module__.split('.')[-1]} " + f"tasks, {klass} must inherit from {spec_type}" ) inputs = get_fields(klass, arg_type, auto_attribs, input_helps) @@ -376,8 +376,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: ) from None if not issubclass(outputs_klass, outputs_type): raise ValueError( - f"The canonical form of {spec_type.__module__.split('.')[-1]} task definitions, " - f"{klass}, must inherit from {spec_type}" + f"When using the canonical form for {outputs_type.__module__.split('.')[-1]} " + f"task outputs {outputs_klass}, you must inherit from {outputs_type}" ) output_helps, _ = parse_doc_string(outputs_klass.__doc__) From 76c769b2475cb096cafcae0377054e707f7daf3f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 16:09:21 +1100 Subject: [PATCH 299/342] debugging test_shelltask* --- pydra/engine/specs.py | 4 +- pydra/engine/tests/test_shelltask.py | 602 +++++++++--------- .../engine/tests/test_shelltask_inputspec.py | 6 +- pydra/engine/tests/utils.py | 5 + 4 files changed, 311 insertions(+), 306 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index f352e134e4..3f65e752e2 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -338,7 +338,9 @@ def split( ): split_val = StateArray(value) else: - raise TypeError(f"Could not split {value} as it is not a sequence type") + raise TypeError( + f"Could not split {value!r} as it is not a sequence type" + ) split_inputs[name] = split_val split_def = attrs.evolve(self, **split_inputs) split_def._splitter = splitter diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 19663ba42a..3e362a9a70 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -6,9 +6,9 @@ from pathlib import Path import re import stat -from ..submitter import Submitter +from pydra.engine.submitter import Submitter from pydra.design import shell, workflow -from ..specs import ( +from pydra.engine.specs import ( ShellOutputs, ShellDef, ) @@ -19,8 +19,9 @@ from pydra.utils.typing import ( MultiOutputFile, MultiInputObj, + StateArray, ) -from .utils import run_no_submitter, run_submitter, no_win +from .utils import run_no_submitter, run_submitter, no_win, get_output_names if sys.platform.startswith("win"): pytest.skip("SLURM not available in windows", allow_module_level=True) @@ -34,7 +35,7 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): shelly = shell.define(cmd)() assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, worker=plugin_dask_opt, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin_dask_opt, cache_dir=tmp_path) assert Path(outputs.stdout.rstrip()).parent == tmp_path assert outputs.return_code == 0 assert outputs.stderr == "" @@ -50,8 +51,8 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) - assert Path(outputs.stdout) == Path(shelly.output_dir) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + assert Path(outputs.stdout).parent == tmp_path assert outputs.return_code == 0 assert outputs.stderr == "" @@ -64,7 +65,7 @@ def test_shell_cmd_2(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout.strip() == " ".join(cmd[1:]) assert outputs.return_code == 0 assert outputs.stderr == "" @@ -81,7 +82,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout.strip() == " ".join(cmd_args) assert outputs.return_code == 0 assert outputs.stderr == "" @@ -91,14 +92,14 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): def test_shell_cmd_2b(plugin, results_function, tmp_path): """a command with arguments, using strings executable and args""" cmd_exec = "echo" - cmd_args = "pydra" + cmd_args = ["pydra"] # separate command into exec + args - shelly = shell.define(cmd_exec)(args=cmd_args) + shelly = shell.define(cmd_exec)(additional_args=cmd_args) assert shelly.executable == "echo" assert shelly.cmdline == "echo pydra" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "pydra\n" assert outputs.return_code == 0 assert outputs.stderr == "" @@ -115,11 +116,11 @@ def test_shell_cmd_3(plugin_dask_opt, tmp_path): cmd = ["pwd", "whoami"] # all args given as executable - shelly = shell.define("placeholder")().split("executable", executable=cmd) + shelly = shell.define("shelly")().split(executable=cmd) # assert shelly.cmdline == ["pwd", "whoami"] - outputs = shelly(worker=plugin_dask_opt) - assert Path(outputs.stdout[0].rstrip()) == shelly.output_dir[0] + outputs = shelly(plugin=plugin_dask_opt, cache_dir=tmp_path) + assert Path(outputs.stdout[0].rstrip()).parent == tmp_path if "USER" in os.environ: assert outputs.stdout[1] == f"{os.environ['USER']}\n" @@ -134,14 +135,14 @@ def test_shell_cmd_4(plugin, tmp_path): splitter=args """ cmd_exec = "echo" - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args - shelly = shell.define(cmd_exec)().split(splitter="args", args=cmd_args) + shelly = shell.define(cmd_exec)().split(additional_args=cmd_args) - assert shelly.inputs.executable == "echo" - assert shelly.inputs.args == ["nipype", "pydra"] + assert shelly.executable == "echo" + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - outputs = shelly(worker=plugin) + outputs = shelly(plugin=plugin) assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -155,16 +156,18 @@ def test_shell_cmd_5(plugin, tmp_path): using splitter and combiner for args """ cmd_exec = "echo" - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args shelly = ( - shell.define(cmd_exec)().split(splitter="args", args=cmd_args).combine("args") + shell.define(cmd_exec)() + .split(additional_args=cmd_args) + .combine("additional_args") ) - assert shelly.inputs.executable == "echo" - assert shelly.inputs.args == ["nipype", "pydra"] + assert shelly.executable == "echo" + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - outputs = shelly(worker=plugin) + outputs = shelly(plugin=plugin) assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -175,21 +178,21 @@ def test_shell_cmd_6(plugin, tmp_path): outer splitter for executable and args """ cmd_exec = ["echo", ["echo", "-n"]] - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args - shelly = shell.define("placeholder").split( + shelly = shell.define("shelly")().split( executable=cmd_exec, additional_args=cmd_args ) - assert shelly.inputs.executable == ["echo", ["echo", "-n"]] - assert shelly.inputs.args == ["nipype", "pydra"] + assert shelly.executable == ["echo", ["echo", "-n"]] + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == [ # "echo nipype", # "echo pydra", # "echo -n nipype", # "echo -n pydra", # ] - outputs = shelly(worker=plugin) + outputs = shelly(plugin=plugin) assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -217,18 +220,18 @@ def test_shell_cmd_7(plugin, tmp_path): outer splitter for executable and args, and combiner=args """ cmd_exec = ["echo", ["echo", "-n"]] - cmd_args = ["nipype", "pydra"] + cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args shelly = ( - shell.define("placeholder") + shell.define("shelly")() .split(executable=cmd_exec, additional_args=cmd_args) - .combine("args") + .combine("additional_args") ) - assert shelly.inputs.executable == ["echo", ["echo", "-n"]] - assert shelly.inputs.args == ["nipype", "pydra"] + assert shelly.executable == ["echo", ["echo", "-n"]] + assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) - outputs = shelly(worker=plugin) + outputs = shelly(plugin=plugin) assert outputs.stdout[0][0] == "nipype\n" assert outputs.stdout[0][1] == "pydra" @@ -250,7 +253,7 @@ def Workflow(cmd1, cmd2): wf = Workflow(cmd1="pwd", cmd2="ls") - with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) assert "_result.pklz" in res.outputs.out @@ -268,7 +271,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): """ cmd_exec = "echo" cmd_opt = True - cmd_args = "hello from pydra" + cmd_args = ["hello from pydra"] @shell.define class Shelly(ShellDef["Shelly.Outputs"]): @@ -279,16 +282,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): help="option", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args - shelly = Shelly( - additional_args=cmd_args, - opt_n=cmd_opt, - ) + shelly = Shelly(additional_args=cmd_args, opt_n=cmd_opt) assert shelly.executable == cmd_exec - assert shelly.args == cmd_args + assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "hello from pydra" @@ -317,16 +320,15 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="-n", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args - shelly = Shelly( - additional_args=cmd_args, - opt_n=cmd_opt, - opt_hello=cmd_opt_hello, - ) + shelly = Shelly(additional_args=cmd_args, opt_n=cmd_opt, opt_hello=cmd_opt_hello) assert shelly.executable == cmd_exec - assert shelly.args == cmd_args + assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO from pydra" @@ -342,17 +344,17 @@ class Shelly(ShellDef["Shelly.Outputs"]): text: str = shell.arg( position=1, help="text", - mandatory=True, argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args - shelly = Shelly( - text=hello, - ) + shelly = Shelly(text=hello) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -367,15 +369,16 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd_exec - text: str = shell.arg(position=1, help="text", mandatory=True, argstr="") + text: str = shell.arg(position=1, help="text", argstr="") + + class Outputs(ShellOutputs): + pass # separate command into exec + args - shelly = Shelly( - text=hello, - ) + shelly = Shelly(text=hello) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -391,17 +394,19 @@ class Shelly(ShellDef["Shelly.Outputs"]): text: str = shell.arg( position=1, help="text", - mandatory=True, argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(executable=cmd_exec) shelly.text = hello assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -415,10 +420,12 @@ class Shelly(ShellDef["Shelly.Outputs"]): text: str = shell.arg( position=1, help="text", - mandatory=True, argstr="", ) + class Outputs(ShellOutputs): + pass + shelly = Shelly(executable=cmd_exec) with pytest.raises(Exception) as excinfo: @@ -438,16 +445,18 @@ class Shelly(ShellDef["Shelly.Outputs"]): default=None, position=1, help="text", - mandatory=False, argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(executable=cmd_exec) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "\n" @@ -466,13 +475,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(executable=cmd_exec) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "Hello\n" @@ -488,13 +500,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd_exec text: str = shell.arg(default="Hello", position=1, help="text", argstr="") + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(executable=cmd_exec) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "Hello\n" @@ -513,13 +528,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(executable=cmd_exec) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hi" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "Hi\n" @@ -539,10 +557,12 @@ class Shelly(ShellDef["Shelly.Outputs"]): default="Hello", position=1, help="text", - mandatory=True, argstr="", ) + class Outputs(ShellOutputs): + pass + def test_shell_cmd_inputspec_4d_exception(plugin): """mandatory field added to fields, value provided""" @@ -564,6 +584,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) + class Outputs(ShellOutputs): + pass + @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): @@ -587,11 +610,14 @@ class Shelly(ShellDef["Shelly.Outputs"]): xor=["opt_t"], ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(opt_t=cmd_t) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -t" - results_function(shelly, worker=plugin, cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): @@ -616,6 +642,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): xor=["opt_t"], ) + class Outputs(ShellOutputs): + pass + shelly = Shelly(opt_t=cmd_t, opt_S=cmd_S) with pytest.raises(Exception) as excinfo: shelly() @@ -646,11 +675,14 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="-l", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly(opt_t=cmd_t, opt_l=cmd_l) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, worker=plugin, cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_6a_exception(plugin): @@ -675,6 +707,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="-l", ) + class Outputs(ShellOutputs): + pass + shelly = Shelly(executable=cmd_exec, opt_t=cmd_t) with pytest.raises(Exception) as excinfo: shelly() @@ -705,6 +740,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="-l", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly( opt_t=cmd_t @@ -713,7 +751,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): shelly.opt_l = cmd_l assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, worker=plugin, cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -723,7 +761,7 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): using name_tamplate in metadata """ cmd = "touch" - args = "newfile_tmp.txt" + args = ["newfile_tmp.txt"] @shell.define class Shelly(ShellDef["Shelly.Outputs"]): @@ -737,12 +775,12 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, additional_args=args) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" out1 = outputs.out1.fspath assert out1.exists() # checking if the file is created in a good place - assert shelly.output_dir == out1.parent + assert out1.parent.parent == tmp_path assert out1.name == "newfile_tmp.txt" @@ -754,7 +792,7 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): and changing the output name for output_spec using output_field_name """ cmd = "touch" - args = "newfile_tmp.txt" + args = File.mock("newfile_tmp.txt") @shell.define class Shelly(ShellDef["Shelly.Outputs"]): @@ -772,10 +810,10 @@ class Outputs(ShellOutputs): additional_args=args, ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == outputs.out1_changed.fspath.parent + assert outputs.out1_changed.fspath.parent.parent == tmp_path assert outputs.out1_changed.fspath.name == "newfile_tmp.txt" @@ -797,14 +835,14 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{newfile}", help="output file", ) - shelly = Shelly(executable=cmd, newfile="newfile_tmp.txt") + shelly = Shelly(executable=cmd, newfile=File.mock("newfile_tmp.txt")) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -816,24 +854,24 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): using name_tamplate with txt extension (extension from args should be removed """ cmd = "touch" - args = "newfile_tmp.txt" + args = File.mock("newfile_tmp.txt") @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{args}.txt", help="output file", ) shelly = Shelly(executable=cmd, additional_args=args) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # checking if the file is created in a good place - assert shelly.output_dir == outputs.out1.fspath.parent + assert outputs.out1.fspath.parent.parent == tmp_path assert outputs.out1.fspath.name == "newfile_tmp.txt" @@ -860,18 +898,18 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{newfile}", help="output file", ) shelly = Shelly( executable=cmd, - newfile="newfile_tmp.txt", + newfile=File.mock("newfile_tmp.txt"), time="02121010", ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -899,18 +937,18 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{newfile}", help="output file", ) shelly = Shelly( executable=cmd, - newfile="newfile_tmp.txt", + newfile=File.mock("newfile_tmp.txt"), time="02121010", ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -937,7 +975,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - file_copy: str = shell.outarg( + file_copy: File = shell.outarg( path_template="{file_orig}_copy", help="output file", argstr="", @@ -948,12 +986,12 @@ class Outputs(ShellOutputs): file_orig=file, ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == outputs.file_copy.fspath.parent + assert outputs.file_copy.fspath.parent.parent == tmp_path @pytest.mark.parametrize("results_function", [run_no_submitter]) @@ -978,7 +1016,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - file_copy: str = shell.outarg( + file_copy: File = shell.outarg( path_template="{file_orig}_copy", help="output file", argstr="", @@ -986,12 +1024,12 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, file_orig=file) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == outputs.file_copy.fspath.parent + assert outputs.file_copy.fspath.parent.parent == tmp_path @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1026,7 +1064,7 @@ class Outputs(ShellOutputs): file_orig=file, ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy" @@ -1060,16 +1098,13 @@ class Outputs(ShellOutputs): argstr="", ) - shelly = Shelly( - executable=cmd, - file_orig=file, - ) + shelly = Shelly(executable=cmd, file_orig=file) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file" - assert outputs.file_copy.fspath.parent == shelly.output_dir + assert outputs.file_copy.fspath.parent.parent == tmp_path @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1106,12 +1141,12 @@ class Outputs(ShellOutputs): file_copy="my_file_copy.txt", ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "my_file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == outputs.file_copy.fspath.parent + assert outputs.file_copy.fspath.parent.parent == tmp_path @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1136,7 +1171,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="...", sep=" ", help="list of files", - mandatory=True, ) shelly = Shelly( @@ -1144,7 +1178,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) assert shelly.executable == cmd_exec - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "hello from boston" @@ -1171,7 +1205,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): position=1, argstr="", help="a file", - mandatory=True, ) with pytest.raises(FileNotFoundError): @@ -1245,11 +1278,10 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) number: int = shell.arg( help="a number", - mandatory=True, ) class Outputs(ShellOutputs): - file_copy: str = shell.outarg( + file_copy: File = shell.outarg( path_template=template_function, help="output file", argstr="", @@ -1261,13 +1293,13 @@ class Outputs(ShellOutputs): number=2, ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" fspath = outputs.file_copy.fspath assert fspath.exists() assert fspath.name == "file_even.txt" # checking if it's created in a good place - assert shelly.output_dir == fspath.parent + assert fspath.parent.parent == tmp_path def test_shell_cmd_inputspec_with_iterable(): @@ -1312,23 +1344,22 @@ class Shelly(ShellDef["Shelly.Outputs"]): position=1, argstr="", help="orig file", - mandatory=True, copyfile=True, ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( path_template="{orig_file}", help="output file", ) shelly = Shelly(executable=cmd, orig_file=str(file)) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert outputs.out_file.fspath.parent == shelly.output_dir + assert outputs.out_file.fspath.parent.parent == tmp_path with open(outputs.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged @@ -1355,23 +1386,22 @@ class Shelly(ShellDef["Shelly.Outputs"]): position=1, argstr="", help="orig file", - mandatory=True, - copyfile="hardlink", + copy_mode="hardlink", ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( path_template="{orig_file}", help="output file", ) shelly = Shelly(executable=cmd, orig_file=str(file)) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying - assert outputs.out_file.fspath.parent == shelly.output_dir + assert outputs.out_file.fspath.parent.parent == tmp_path assert outputs.out_file.fspath.parent.joinpath( outputs.out_file.fspath.name + "s" @@ -1415,11 +1445,10 @@ class Shelly(ShellDef["Shelly.Outputs"]): position=1, argstr="", help="orig file", - mandatory=True, ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( path_template="{orig_file}", help="output file", ) @@ -1429,7 +1458,7 @@ class Outputs(ShellOutputs): orig_file=str(file), ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is not copied, it is changed in place @@ -1450,16 +1479,15 @@ class Shelly(ShellDef["Shelly.Outputs"]): text: str = shell.arg( position=1, help="text", - mandatory=True, argstr="", ) # separate command into exec + args shelly = Shelly().split("text", text=hello) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout[0] == "HELLO\n" assert outputs.stdout[1] == "hi\n" @@ -1512,15 +1540,14 @@ class Shelly(ShellDef["Shelly.Outputs"]): text: str = shell.arg( position=1, help="text", - mandatory=True, argstr="", ) # separate command into exec + args shelly = Shelly().split(text=["HELLO", "hi"]) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout[0] == "HELLO\n" assert outputs.stdout[1] == "hi\n" @@ -1531,25 +1558,25 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): adding splitter to input that is used in the output_file_tamplate """ cmd = "touch" - args = ["newfile_1.txt", "newfile_2.txt"] + args = [File.mock("newfile_1.txt"), File.mock("newfile_2.txt")] @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{args}", help="output file", ) shelly = Shelly(executable=cmd).split(args=args) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() - assert outputs.out1[i].fspath.parent == shelly.output_dir[i] + assert outputs.out1[i].fspath.parent.parent == tmp_path[i] @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1571,16 +1598,15 @@ class Shelly(ShellDef["Shelly.Outputs"]): file: File = shell.arg( position=1, help="files", - mandatory=True, argstr="", ) shelly = Shelly().split(file=[file_1, file_2]) - assert shelly.inputs.executable == cmd_exec + assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout[0] == "hello from pydra" assert outputs.stdout[1] == "have a nice one" @@ -1607,12 +1633,11 @@ class Shelly(ShellDef["Shelly.Outputs"]): position=1, argstr="", help="orig file", - mandatory=True, - copyfile="copy", + copy_mode="copy", ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( path_template="{orig_file}", help="output file", ) @@ -1622,12 +1647,12 @@ class Outputs(ShellOutputs): ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(files)): assert outputs.stdout[i] == "" assert outputs.out_file[i].fspath.exists() # the file is copied, and than it is changed in place - assert outputs.out_file[i].fspath.parent == shelly.output_dir[i] + assert outputs.out_file[i].fspath.parent.parent == tmp_path[i] with open(outputs.out_file[i]) as f: assert f"hi {txt_l[i]}\n" == f.read() # the original file is unchanged @@ -1649,7 +1674,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = "touch" class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{args}", help="output file", ) @@ -1666,14 +1691,14 @@ def Workflow(cmd, args): return shelly.out1, shelly.stdout - wf = Workflow(cmd="touch", args="newfile.txt") + wf = Workflow(cmd="touch", args=File.mock("newfile.txt")) - with Submitter(worker=plugin_dask_opt) as sub: + with Submitter(plugin=plugin_dask_opt) as sub: res = sub(wf) assert res.outputs.out == "" assert res.outputs.out_f.fspath.exists() - assert res.outputs.out_f.fspath.parent == wf.output_dir + assert res.outputs.out_f.fspath.parent.parent == tmp_path def test_wf_shell_cmd_2a(plugin, tmp_path): @@ -1683,10 +1708,10 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - executable = "placeholder" + executable = "shelly" class Outputs(ShellOutputs): - out1: str = shell.outarg( + out1: File = shell.outarg( path_template="{args}", help="output file", ) @@ -1703,9 +1728,9 @@ def Workflow(cmd, args): return shelly.out1, shelly.stdout - wf = Workflow(cmd="touch", args=("newfile.txt",)) + wf = Workflow(cmd="touch", args=(File.mock("newfile.txt"),)) - with Submitter(worker=plugin) as sub: + with Submitter(plugin=plugin) as sub: res = sub(wf) assert res.outputs.out == "" @@ -1720,8 +1745,10 @@ def test_wf_shell_cmd_3(plugin, tmp_path): @shell.define class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly" + class Outputs(ShellOutputs): - file: str = shell.outarg( + file: File = shell.outarg( path_template="{args}", help="output file", ) @@ -1735,7 +1762,7 @@ class Shelly2(ShellDef["Shelly2.Outputs"]): ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( position=2, argstr="", path_template="{orig_file}_copy", @@ -1760,17 +1787,17 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - wf = Workflow(cmd1="touch", cmd2="cp", args="newfile.txt") + wf = Workflow(cmd1="touch", cmd2="cp", args=File.mock("newfile.txt")) - with Submitter(worker=plugin) as sub: + with Submitter(plugin=plugin) as sub: res = sub(wf) assert res.outputs.out1 == "" assert res.outputs.touch_file.fspath.exists() - assert res.outputs.touch_file.fspath.parent == wf.output_dir + assert res.outputs.touch_file.fspath.parent.parent == tmp_path assert res.outputs.out2 == "" assert res.outputs.cp_file.fspath.exists() - assert res.outputs.cp_file.fspath.parent == wf.output_dir + assert res.outputs.cp_file.fspath.parent.parent == tmp_path def test_wf_shell_cmd_3a(plugin, tmp_path): @@ -1821,9 +1848,9 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - wf = Workflow(cmd1="touch", cmd2="cp", args="newfile.txt") + wf = Workflow(cmd1="touch", cmd2="cp", args=File.mock("newfile.txt")) - with Submitter(worker=plugin) as sub: + with Submitter(plugin=plugin) as sub: res = sub(wf) assert res.outputs.out1 == "" @@ -1841,7 +1868,7 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): @shell.define class Shelly1(ShellDef["Shelly1.Outputs"]): class Outputs(ShellOutputs): - file: str = shell.outarg( + file: File = shell.outarg( path_template="{args}", help="output file", ) @@ -1855,7 +1882,7 @@ class Shelly2(ShellDef["Shelly2.Outputs"]): ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( position=2, argstr="", path_template="{orig_file}_copy", @@ -1881,19 +1908,19 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout wf = Workflow(cmd1="touch", cmd2="cp").split( - args=["newfile_1.txt", "newfile_2.txt"] + args=[File.mock("newfile_1.txt"), File.mock("newfile_2.txt")] ) - with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) for i in range(2): assert res.outputs.out1[i] == "" assert res.outputs.touch_file[i].fspath.exists() - assert res.outputs.touch_file[i].fspath.parent == wf.output_dir[i] + assert res.outputs.touch_file[i].fspath.parent.parent == tmp_path[i] assert res.outputs.out2[i] == "" assert res.outputs.cp_file[i].fspath.exists() - assert res.outputs.cp_file[i].fspath.parent == wf.output_dir[i] + assert res.outputs.cp_file[i].fspath.parent.parent == tmp_path[i] def test_wf_shell_cmd_ndst_1(plugin, tmp_path): @@ -1905,7 +1932,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): @shell.define class Shelly1(ShellDef["Shelly1.Outputs"]): class Outputs(ShellOutputs): - file: str = shell.outarg( + file: File = shell.outarg( path_template="{args}", help="output file", ) @@ -1919,7 +1946,7 @@ class Shelly2(ShellDef["Shelly2.Outputs"]): ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( position=2, argstr="", path_template="{orig_file}_copy", @@ -1943,9 +1970,13 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - wf = Workflow(cmd1="touch", cmd2="cp", args=["newfile_1.txt", "newfile_2.txt"]) + wf = Workflow( + cmd1="touch", + cmd2="cp", + args=[File.mock("newfile_1.txt"), File.mock("newfile_2.txt")], + ) - with Submitter(worker=plugin) as sub: + with Submitter(plugin=plugin) as sub: res = sub(wf) assert res.outputs.out1 == ["", ""] @@ -1962,13 +1993,16 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ - cmd = ["touch", "newfile_tmp.txt"] + cmd = ["touch", File.mock("newfile_tmp.txt")] Shelly = shell.define( - cmd, outputs=[shell.arg(name="newfile", type=File, default="newfile_tmp.txt")] + cmd, + outputs=[ + shell.outarg(name="newfile", type=File, path_template="newfile_tmp.txt") + ], ) shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -1986,11 +2020,11 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = shell.outarg(default="newfile_tmp.txt") + newfile: File = shell.outarg(path_template="newfile_tmp.txt") shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -2007,12 +2041,12 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = "newfile_tmp_.txt" + newfile: File = shell.outarg(path_template="newfile_tmp_.txt") shelly = Shelly() with pytest.raises(Exception) as exinfo: - with Submitter(worker=plugin) as sub: + with Submitter(plugin=plugin) as sub: shelly(submitter=sub) assert "does not exist" in str(exinfo.value) @@ -2031,11 +2065,11 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = "newfile_*.txt" + newfile: File = shell.outarg(path_template="newfile_*.txt") shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -2058,7 +2092,7 @@ class Outputs(ShellOutputs): shelly = Shelly() with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: + with Submitter(plugin=plugin) as sub: shelly(submitter=sub) assert "no file matches" in str(excinfo.value) @@ -2081,7 +2115,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2111,15 +2145,15 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 assert all([file.fspath.exists() for file in outputs.newfile]) assert ( - shelly.output_names - == shelly._generated_output_names - == ["return_code", "stdout", "stderr", "newfile"] + get_output_names(shelly) + == shelly._generated_output_names(outputs.stdout, outputs.stderr) + == ["newfile", "return_code", "stderr", "stdout"] ) @@ -2147,7 +2181,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2202,7 +2236,7 @@ def gather_output(executable, output_dir): shelly = Shelly() - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2235,7 +2269,7 @@ class Outputs(ShellOutputs): additional_args=args, ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -2284,7 +2318,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd script: File = shell.arg( help="script file", - mandatory=True, position=1, argstr="", ) @@ -2293,7 +2326,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="...", sep=" ", help="list of name indices", - mandatory=True, ) class Outputs(ShellOutputs): @@ -2331,7 +2363,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd script: File = shell.arg( help="script file", - mandatory=True, position=1, argstr="", ) @@ -2340,7 +2371,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="...", sep=" ", help="list of name indices", - mandatory=True, ) class Outputs(ShellOutputs): @@ -2403,7 +2433,7 @@ class Outputs(ShellOutputs): shelly = Shelly().split("additional_args", args=args) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index in range(2): assert outputs.out_file_index[index] == index + 1 assert outputs.stderr_field[index] == f"stderr: {outputs.stderr}" @@ -2458,7 +2488,7 @@ class Outputs(ShellOutputs): shelly = Shelly(resultsDir="outdir").split(additional_args=args) - results_function(shelly, worker=plugin, cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index, arg_dir in enumerate(args): assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" @@ -2494,15 +2524,16 @@ class Outputs(ShellOutputs): shelly = Shelly(resultsDir="test") assert ( - shelly.output_names + get_output_names(shelly) == shelly._generated_output_names - == ["return_code", "stdout", "stderr", "resultsDir"] + == ["resultsDir", "return_code", "stderr", "stdout"] ) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) print("Cache_dirr:", shelly.cache_dir) - assert (shelly.output_dir / Path("test")).exists() + output_dir = next(tmp_path.iterdir()) + assert (output_dir / Path("test")).exists() assert get_lowest_directory(outputs.resultsDir) == get_lowest_directory( - shelly.output_dir / Path("test") + output_dir / Path("test") ) @@ -2531,7 +2562,7 @@ class Outputs(ShellOutputs): executable=cmd, ).split("args", args=args) - outputs = results_function(shelly, worker=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() @@ -2554,7 +2585,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = shell.outarg(default="newfile_tmp.txt") + newfile: File = shell.outarg(path_template="newfile_tmp.txt") @workflow.define(outputs=["stdout", "newfile"]) def Workflow(cmd): @@ -2563,13 +2594,13 @@ def Workflow(cmd): wf = Workflow() - with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.stdout == "" assert res.outputs.newfile.fspath.exists() # checking if the file was copied to the wf dir - assert res.outputs.newfile.fspath.parent == wf.output_dir + assert res.outputs.newfile.fspath.parent.parent == tmp_path def test_shell_cmd_inputspec_outputspec_1(): @@ -2589,7 +2620,9 @@ class Outputs(ShellOutputs): executable = cmd - shelly = Shelly(file1="new_file_1.txt", file2="new_file_2.txt") + shelly = Shelly( + file1=File.mock("new_file_1.txt"), file2=File.mock("new_file_2.txt") + ) outputs = shelly() assert outputs.stdout == "" @@ -2618,7 +2651,7 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") outputs = shelly() assert outputs.stdout == "" @@ -2652,14 +2685,14 @@ class Outputs(ShellOutputs): requires=["file1", "file2"], ) - shelly = Shelly() - shelly.file1 = "new_file_1.txt" - shelly.file2 = "new_file_2.txt" + shelly = Shelly( + file1=File.mock("new_file_1.txt"), file2=File.mock("new_file_2.txt") + ) # all fields from output_spec should be in output_names and _generated_output_names assert ( - shelly.output_names + get_output_names(shelly) == shelly._generated_output_names - == ["return_code", "stdout", "stderr", "newfile1", "newfile2"] + == ["newfile1", "newfile2", "return_code", "stderr", "stdout"] ) outputs = shelly() @@ -2696,23 +2729,24 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") # _generated_output_names should know that newfile2 will not be generated - assert shelly.output_names == [ - "return_code", - "stdout", - "stderr", + assert get_output_names(shelly) == [ "newfile1", "newfile2", - ] - assert shelly._generated_output_names == [ "return_code", - "stdout", "stderr", - "newfile1", + "stdout", ] outputs = shelly() + assert shelly._generated_output_names(outputs.stdout, outputs.stderr) == [ + "newfile1", + "return_code", + "stderr", + "stdout", + ] + assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() assert outputs.newfile2 is attr.NOTHING @@ -2744,8 +2778,8 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" - shelly.file2 = "new_file_2.txt" + shelly.file1 = File.mock("new_file_1.txt") + shelly.file2 = File.mock("new_file_2.txt") shelly.additional_inp = 2 outputs = shelly() @@ -2781,21 +2815,21 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" - shelly.file2 = "new_file_2.txt" + shelly.file1 = File.mock("new_file_1.txt") + shelly.file2 = File.mock("new_file_2.txt") # _generated_output_names should know that newfile2 will not be generated - assert shelly.output_names == [ - "return_code", - "stdout", - "stderr", + assert get_output_names(shelly) == [ "newfile1", "newfile2", + "return_code", + "stderr", + "stdout", ] assert shelly._generated_output_names == [ + "newfile1", "return_code", - "stdout", "stderr", - "newfile1", + "stdout", ] outputs = shelly() @@ -2829,16 +2863,17 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") shelly.additional_inp = 2 # _generated_output_names should be the same as output_names + + outputs = shelly() assert ( - shelly.output_names - == shelly._generated_output_names - == ["return_code", "stdout", "stderr", "newfile1"] + get_output_names(shelly) + == shelly._generated_output_names(outputs.stdout, outputs.stderr) + == ["newfile1", "return_code", "stderr", "stdout"] ) - outputs = shelly() assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() @@ -2868,7 +2903,7 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") # the value is not in the list from requires shelly.additional_inp = 1 @@ -2907,7 +2942,7 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") shelly.additional_inp_A = 2 outputs = shelly() @@ -2945,7 +2980,7 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") shelly.additional_inp_B = 2 outputs = shelly() @@ -2983,7 +3018,7 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") outputs = shelly() assert outputs.stdout == "" @@ -3015,7 +3050,7 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = "new_file_1.txt" + shelly.file1 = File.mock("new_file_1.txt") with pytest.raises(Exception, match="requires field can be"): shelly() @@ -3050,7 +3085,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): in_file: File = shell.arg( help="input file to skull strip", position=1, - mandatory=True, argstr="", ) @@ -3130,7 +3164,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out_file: str = shell.outarg( + out_file: File = shell.outarg( help="name of output skull stripped image", position=2, argstr="", @@ -3149,7 +3183,7 @@ class Outputs(ShellOutputs): # separate command into exec + args shelly = Shelly(in_file=in_file) - out_file = shelly.output_dir / "test_brain.nii.gz" + out_file = next(tmp_path.iterdir()) / "test_brain.nii.gz" assert shelly.executable == "bet" assert shelly.cmdline == f"bet {in_file} {out_file}" # outputs = shelly(plugin="cf") @@ -3222,12 +3256,11 @@ def test_shell_cmd_non_existing_outputs_1(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - + executable = "echo" out_name: str = shell.arg( help=""" base name of the pretend outputs. """, - mandatory=True, ) class Outputs(ShellOutputs): @@ -3241,7 +3274,6 @@ class Outputs(ShellOutputs): ) shelly = Shelly( - executable="echo", out_name="test", ) outputs = shelly() @@ -3255,12 +3287,11 @@ def test_shell_cmd_non_existing_outputs_2(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - + executable = "touch" out_name: str = shell.arg( help=""" base name of the pretend outputs. """, - mandatory=True, argstr="{out_name}_1.nii", ) @@ -3269,21 +3300,18 @@ class Outputs(ShellOutputs): help="fictional output #1", path_template="{out_name}_1.nii", ) - out_2: File = shell.outarg( + out_2: File | None = shell.outarg( help="fictional output #2", path_template="{out_name}_2.nii", ) - shelly = Shelly( - executable="touch", - out_name="test", - ) + shelly = Shelly(out_name="test") outputs = shelly() # the first output file is created - assert outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert outputs.out_1.fspath == next(tmp_path.iterdir()) / "test_1.nii" assert outputs.out_1.fspath.exists() # the second output file is not created - assert outputs.out_2 == attr.NOTHING + assert outputs.out_2 is None def test_shell_cmd_non_existing_outputs_3(tmp_path): @@ -3293,12 +3321,11 @@ def test_shell_cmd_non_existing_outputs_3(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - + executable = "touch" out_name: str = shell.arg( help=""" base name of the pretend outputs. """, - mandatory=True, argstr="{out_name}_1.nii", ) @@ -3306,21 +3333,17 @@ class Outputs(ShellOutputs): out_1: File = shell.outarg( help="fictional output #1", path_template="{out_name}_1.nii", - mandatory=True, ) out_2: File = shell.outarg( help="fictional output #2", path_template="{out_name}_2.nii", ) - shelly = Shelly( - executable="touch", - out_name="test", - ) - shelly() - outputs = shelly.result() + shelly = Shelly(out_name="test") + + outputs = shelly() # the first output file is created - assert outputs.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert outputs.out_1.fspath == next(tmp_path.iterdir()) / "test_1.nii" assert outputs.out_1.fspath.exists() # the second output file is not created assert outputs.out_2 == attr.NOTHING @@ -3333,12 +3356,11 @@ def test_shell_cmd_non_existing_outputs_4(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - + executable = "touch" out_name: str = shell.arg( help=""" base name of the pretend outputs. """, - mandatory=True, argstr="{out_name}_1.nii", ) @@ -3346,16 +3368,13 @@ class Outputs(ShellOutputs): out_1: File = shell.outarg( help="fictional output #1", path_template="{out_name}_1.nii", - mandatory=True, ) out_2: File = shell.outarg( help="fictional output #2", path_template="{out_name}_2.nii", - mandatory=True, ) shelly = Shelly( - executable="touch", out_name="test", ) # An exception should be raised because the second mandatory output does not exist @@ -3363,7 +3382,7 @@ class Outputs(ShellOutputs): shelly() assert "mandatory output for variable out_2 does not exist" == str(excinfo.value) # checking if the first output was created - assert (Path(shelly.output_dir) / Path("test_1.nii")).exists() + assert (next(tmp_path.iterdir()) / "test_1.nii").exists() def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): @@ -3376,7 +3395,6 @@ class Shelly(ShellDef["Shelly.Outputs"]): help=""" base name of the pretend outputs. """, - mandatory=True, argstr="...", ) @@ -3386,11 +3404,9 @@ class Outputs(ShellOutputs): path_template="{out_name}", ) - shelly = Shelly( - out_name=["test_1.nii", "test_2.nii"], - ) - shelly() - outputs = shelly.result() + shelly = Shelly(out_name=["test_1.nii", "test_2.nii"]) + + outputs = shelly() # checking if the outputs are Nothing assert outputs.out_list[0] == attr.NOTHING assert outputs.out_list[1] == attr.NOTHING @@ -3402,13 +3418,12 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - + executable = "touch" out_name: MultiInputObj = shell.arg( help=""" base name of the pretend outputs. """, sep=" test_1_real.nii", # hacky way of creating an extra file with that name - mandatory=True, argstr="...", ) @@ -3418,14 +3433,11 @@ class Outputs(ShellOutputs): path_template="{out_name}_real.nii", ) - shelly = Shelly( - executable="touch", - out_name=["test_1", "test_2"], - ) - shelly() - outputs = shelly.result() + shelly = Shelly(out_name=["test_1", "test_2"]) + + outputs = shelly() # checking if the outputs are Nothing - assert outputs.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") + assert outputs.out_list[0] == File(next(tmp_path.iterdir()) / "test_1_real.nii") assert outputs.out_list[1] == attr.NOTHING @@ -3450,13 +3462,11 @@ class Shelly(ShellDef["Shelly.Outputs"]): help=""" just a dummy name """, - mandatory=True, ) in2: str = shell.arg( help=""" just a dummy name """, - mandatory=True, ) together: ty.List = shell.arg( help=""" @@ -3471,11 +3481,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert shelly.cmdline == "exec -t [i1, i2]" # testing that the formatter can overwrite a provided value for together. - shelly = Shelly( - in1="i1", - in2="i2", - together=[1], - ) + shelly = Shelly(in1="i1", in2="i2", together=[1]) assert shelly.cmdline == "exec -t [i1, i2]" # asking for specific inputs @@ -3539,16 +3545,12 @@ def formatter_1(in1, in2): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = "executable" - in1: str = shell.arg( - help="in1", - ) - in2: str = shell.arg( - help="in2", - ) + in1: str = shell.arg(help="in1") + in2: str = shell.arg(help="in2") together: ty.List = shell.arg( help=""" - uses in1 - """, + uses in1 + """, # When providing a formatter all other metadata options are discarded. formatter=formatter_1, ) @@ -3592,7 +3594,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = script_path - in1: str = shell.arg(help="a dummy string", argstr="", mandatory=True) + in1: str = shell.arg(help="a dummy string", argstr="") shelly = Shelly(in1="hello") diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index e5d8cf92cc..3972cbc3d6 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -3,14 +3,10 @@ import attrs import pytest from pydra.engine.specs import ShellOutputs, ShellDef -from pydra.engine.helpers import list_fields from fileformats.generic import File from pydra.design import shell from pydra.utils.typing import MultiInputObj - - -def get_output_names(task): - return sorted(f.name for f in list_fields(task.Outputs)) +from .utils import get_output_names def test_shell_cmd_execargs_1(): diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 47ba21e4ce..4bdae926a7 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -9,6 +9,7 @@ import subprocess as sp import pytest from fileformats.generic import File +from pydra.engine.helpers import list_fields from pydra.engine.specs import ShellDef from ..submitter import Submitter from pydra.design import workflow, python @@ -38,6 +39,10 @@ ) +def get_output_names(task): + return sorted(f.name for f in list_fields(task.Outputs)) + + def run_no_submitter( shell_def: ShellDef, cache_dir: Path | None = None, From 739e8f3ee427a7023bf13789d52ff6b773fa487b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 16:31:20 +1100 Subject: [PATCH 300/342] updating test_workflow to new syntax --- pydra/engine/tests/test_workflow.py | 3273 +++++++++++++-------------- 1 file changed, 1575 insertions(+), 1698 deletions(-) diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 1478c417a7..89ca005fcc 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -33,209 +33,121 @@ ListMultSum, DOT_FLAG, ) -from ..submitter import Submitter -from pydra.design import python -from ..specs import ShellDef +from pydra.engine.submitter import Submitter +from pydra.design import python, workflow +from pydra.engine.specs import ShellDef from pydra.utils import exc_info_matches -def test_wf_specinfo_input_spec(): - input_spec = SpecInfo( - name="Input", - fields=[ - ("a", str, "", {"mandatory": True}), - ("b", dict, {"foo": 1, "bar": False}, {"mandatory": False}), - ], - bases=(BaseDef,), - ) - wf = Workflow( - name="workflow", - input_spec=input_spec, - ) - for x in ["a", "b", "_graph_checksums"]: - assert hasattr(wf.inputs, x) - assert wf.inputs.a == "" - assert wf.inputs.b == {"foo": 1, "bar": False} - bad_input_spec = SpecInfo( - name="Input", - fields=[ - ("a", str, {"mandatory": True}), - ], - bases=(ShellDef,), - ) - with pytest.raises( - ValueError, match="Provided SpecInfo must have BaseDef as its base." - ): - Workflow(name="workflow", input_spec=bad_input_spec) - - -def test_wf_dict_input_and_output_spec(): - definition = { - "a": str, - "b": ty.Dict[str, ty.Union[int, bool]], - } - wf = Workflow( - name="workflow", - input_spec=definition, - output_spec=definition, - ) - wf.add( - Identity2Flds( - name="identity", - x1=wf.lzin.a, - x2=wf.lzin.b, - ) - ) - wf.set_output( - [ - ("a", wf.identity.lzout.out1), - ("b", wf.identity.lzout.out2), - ] - ) - for x in ["a", "b", "_graph_checksums"]: - assert hasattr(wf.inputs, x) - wf.inputs.a = "any-string" - wf.inputs.b = {"foo": 1, "bar": False} - - with pytest.raises(TypeError) as exc_info: - wf.inputs.a = 1.0 - assert exc_info_matches(exc_info, "Cannot coerce 1.0 into ") - - with pytest.raises(TypeError) as exc_info: - wf.inputs.b = {"foo": 1, "bar": "bad-value"} - assert exc_info_matches( - exc_info, "Could not coerce object, 'bad-value', to any of the union types" - ) - - outputs = wf() - assert outputs.a == "any-string" - assert outputs.b == {"foo": 1, "bar": False} - - -def test_wf_name_conflict1(): - """raise error when workflow name conflicts with a class attribute or method""" - with pytest.raises(ValueError) as excinfo1: - Workflow(name="result", input_spec=["x"]) - assert "Cannot use names of attributes or methods" in str(excinfo1.value) - with pytest.raises(ValueError) as excinfo2: - Workflow(name="done", input_spec=["x"]) - assert "Cannot use names of attributes or methods" in str(excinfo2.value) - - -def test_wf_name_conflict2(): - """raise error when a task with the same name is already added to workflow""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="task_name", x=wf.lzin.x)) - with pytest.raises(ValueError) as excinfo: - wf.add(Identity(name="task_name", x=3)) - assert "Another task named task_name is already added" in str(excinfo.value) - - def test_wf_no_output(plugin, tmpdir): """Raise error when output isn't set with set_output""" - wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.inputs.x = 2 + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + + wf = Workflow(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "Workflow output cannot be None" in str(excinfo.value) def test_wf_1(plugin, tmpdir): """workflow with one task and no splitter""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=2) checksum_before = wf.checksum - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + + assert 4 == results.outputs.out def test_wf_1a_outpastuple(plugin, tmpdir): """workflow with one task and no splitter set_output takes a tuple """ - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output(("out", wf.add2.lzout.out)) - wf.inputs.x = 2 - wf.plugin = plugin - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + wf = Workflow(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert 4 == results.outputs.out def test_wf_1_call_subm(plugin, tmpdir): """using wf.__call_ with submitter""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: wf(submitter=sub) - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + assert 4 == results.outputs.out def test_wf_1_call_plug(plugin, tmpdir): """using wf.__call_ with plugin""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.plugin = plugin - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=2) wf(plugin=plugin) - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + assert 4 == results.outputs.out def test_wf_1_call_noplug_nosubm(plugin, tmpdir): """using wf.__call_ without plugin or submitter""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir - wf() - results = wf.result() - assert 4 == results.output.out - assert wf.output_dir.exists() + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=2) + + outputs = wf() + + assert 4 == results.outputs.out def test_wf_1_call_exception(plugin, tmpdir): """using wf.__call_ with plugin and submitter - should raise an exception""" - wf = Workflow(name="wf_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: with pytest.raises(Exception) as e: wf(submitter=sub, plugin=plugin) assert "Defify submitter OR plugin" in str(e.value) @@ -243,62 +155,64 @@ def test_wf_1_call_exception(plugin, tmpdir): def test_wf_1_inp_in_call(tmpdir): """Defining input in __call__""" - wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 1 + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=1) results = wf(x=2) - assert 4 == results.output.out + assert 4 == results.outputs.out def test_wf_1_upd_in_run(tmpdir): """Updating input in __call__""" - wf = Workflow(name="wf_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 1 + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow(x=1) results = wf(x=2) - assert 4 == results.output.out + assert 4 == results.outputs.out def test_wf_2(plugin, tmpdir): """workflow with 2 tasks, no splitter""" - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + assert 8 == results.outputs.out def test_wf_2a(plugin, tmpdir): """workflow with 2 tasks, no splitter creating add2_task first (before calling add method), """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = Add2(name="add2") - add2_task.inputs.x = wf.mult.lzout.out - wf.add(add2_task) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out - results = wf.result() - assert 8 == results.output.out - assert wf.output_dir.exists() + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert 8 == results.outputs.out def test_wf_2b(plugin, tmpdir): @@ -306,92 +220,79 @@ def test_wf_2b(plugin, tmpdir): creating add2_task first (before calling add method), adding inputs.x after add method """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = Add2(name="add2") - wf.add(add2_task) - add2_task.inputs.x = wf.mult.lzout.out - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf = Workflow(x=2, y=3) - results = wf.result() - assert 8 == results.output.out + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() + assert 8 == results.outputs.out def test_wf_2c_multoutp(plugin, tmpdir): """workflow with 2 tasks, no splitter setting multiple outputs for the workflow """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = Add2(name="add2") - add2_task.inputs.x = wf.mult.lzout.out - wf.add(add2_task) - # setting multiple output (from both nodes) - wf.set_output([("out_add2", wf.add2.lzout.out), ("out_mult", wf.mult.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) - - results = wf.result() + + @workflow.define(outputs=["out_add2", "out_mult"]) + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out, mult.out + + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + # checking outputs from both nodes - assert 6 == results.output.out_mult - assert 8 == results.output.out_add2 - assert wf.output_dir.exists() + assert 6 == results.outputs.out_mult + assert 8 == results.outputs.out_add2 def test_wf_2d_outpasdict(plugin, tmpdir): """workflow with 2 tasks, no splitter setting multiple outputs using a dictionary """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - add2_task = Add2(name="add2") - add2_task.inputs.x = wf.mult.lzout.out - wf.add(add2_task) - # setting multiple output (from both nodes) - wf.set_output({"out_add2": wf.add2.lzout.out, "out_mult": wf.mult.lzout.out}) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) - - results = wf.result() + + @workflow.define(outputs=["out_add2", "out_mult"]) + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out, mult.out + + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + # checking outputs from both nodes - assert 6 == results.output.out_mult - assert 8 == results.output.out_add2 - assert wf.output_dir.exists() + assert 6 == results.outputs.out_mult + assert 8 == results.outputs.out_add2 @pytest.mark.flaky(reruns=3) # when dask def test_wf_3(plugin_dask_opt, tmpdir): """testing None value for an input""" - wf = Workflow(name="wf_3", input_spec=["x", "y"]) - wf.add(FunAddVarNone(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = None - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x, y): + addvar = workflow.add(FunAddVarNone(a=x, b=y)) + add2 = workflow.add(Add2(x=addvar.out)) + return add2.out + + wf = Workflow(x=2, y=None) with Submitter(worker=plugin_dask_opt) as sub: - sub(wf) + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 4 == results.output.out + assert 4 == results.outputs.out @pytest.mark.xfail(reason="the task error doesn't propagate") @@ -399,36 +300,36 @@ def test_wf_3a_exception(plugin, tmpdir): """testinh wf without set input, attr.NOTHING should be set and the function should raise an exception """ - wf = Workflow(name="wf_3", input_spec=["x", "y"]) - wf.add(FunAddVarNone(name="addvar", a=wf.lzin.x, b=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = attr.NOTHING - wf.plugin = plugin - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x, y): + addvar = workflow.add(FunAddVarNone(a=x, b=y)) + add2 = workflow.add(Add2(x=addvar.out)) + return add2.out + + wf = Workflow(x=2, y=attr.NOTHING) with pytest.raises(TypeError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "unsupported" in str(excinfo.value) def test_wf_4(plugin, tmpdir): """wf with a task that doesn't set one input and use the function default value""" - wf = Workflow(name="wf_4", input_spec=["x", "y"]) - wf.add(FunAddVarDefault(name="addvar", a=wf.lzin.x)) - wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + addvar = workflow.add(FunAddVarDefault(a=x)) + add2 = workflow.add(Add2(x=addvar.out)) + return add2.out + + wf = Workflow(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 5 == results.output.out + assert 5 == results.outputs.out def test_wf_4a(plugin, tmpdir): @@ -436,121 +337,127 @@ def test_wf_4a(plugin, tmpdir): the unset input is send to the task input, so the task should use the function default value """ - wf = Workflow(name="wf_4a", input_spec=["x", "y"]) - wf.add(FunAddVarDefault(name="addvar", a=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.addvar.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + addvar = workflow.add(FunAddVarDefault(a=x, y=y)) + add2 = workflow.add(Add2(x=addvar.out)) + return add2.out + + wf = Workflow(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 5 == results.output.out + assert 5 == results.outputs.out def test_wf_5(plugin, tmpdir): """wf with two outputs connected to the task outputs one set_output """ - wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(FunAddSubVar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) - wf.set_output([("out_sum", wf.addsub.lzout.sum), ("out_sub", wf.addsub.lzout.sub)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_sum", "out_sub"]) + def Workflow(x, y): + addsub = workflow.add(FunAddSubVar(a=x, b=y)) + return addsub.sum, addsub.sub - results = wf.result() - assert 5 == results.output.out_sum - assert 1 == results.output.out_sub + wf = Workflow(x=3, y=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert 5 == results.outputs.out_sum + assert 1 == results.outputs.out_sub def test_wf_5a(plugin, tmpdir): """wf with two outputs connected to the task outputs, set_output set twice """ - wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(FunAddSubVar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) - wf.set_output([("out_sum", wf.addsub.lzout.sum)]) - wf.set_output([("out_sub", wf.addsub.lzout.sub)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + addsub = workflow.add(FunAddSubVar(a=x, b=y)) + return addsub.sum # out_sum + return addsub.sub # out_sub + + wf = Workflow(x=3, y=2) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() - assert 5 == results.output.out_sum - assert 1 == results.output.out_sub + assert 5 == results.outputs.out_sum + assert 1 == results.outputs.out_sub def test_wf_5b_exception(tmpdir): """set_output used twice with the same name - exception should be raised""" - wf = Workflow(name="wf_5", input_spec=["x", "y"], x=3, y=2) - wf.add(FunAddSubVar(name="addsub", a=wf.lzin.x, b=wf.lzin.y)) - wf.set_output([("out", wf.addsub.lzout.sum)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + addsub = workflow.add(FunAddSubVar(a=x, b=y)) + return addsub.sum + + wf = Workflow(x=3, y=2) with pytest.raises(Exception, match="are already set"): - wf.set_output([("out", wf.addsub.lzout.sub)]) + return addsub.sub def test_wf_6(plugin, tmpdir): """wf with two tasks and two outputs connected to both tasks, one set_output """ - wf = Workflow(name="wf_6", input_spec=["x", "y"], x=2, y=3) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out1", wf.mult.lzout.out), ("out2", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return mult.out, add2.out # (outputs=["out1", "out2"]) + + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 6 == results.output.out1 - assert 8 == results.output.out2 + assert 6 == results.outputs.out1 + assert 8 == results.outputs.out2 def test_wf_6a(plugin, tmpdir): """wf with two tasks and two outputs connected to both tasks, set_output used twice """ - wf = Workflow(name="wf_6", input_spec=["x", "y"], x=2, y=3) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out1", wf.mult.lzout.out)]) - wf.set_output([("out2", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out1", "out2"]) + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return mult.out, add2.out - assert wf.output_dir.exists() - results = wf.result() - assert 6 == results.output.out1 - assert 8 == results.output.out2 + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert 6 == results.outputs.out1 + assert 8 == results.outputs.out2 def test_wf_st_1(plugin, tmpdir): """Workflow with one task, a splitter for the workflow""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + + return add2.out checksum_before = wf.checksum - with Submitter(worker="serial") as sub: - sub(wf) + with Submitter(cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -562,17 +469,16 @@ def test_wf_st_1(plugin, tmpdir): def test_wf_st_1_call_subm(plugin, tmpdir): """Workflow with one task, a splitter for the workflow""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + + return add2.out - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: wf(submitter=sub) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -586,16 +492,15 @@ def test_wf_st_1_call_plug(plugin, tmpdir): """Workflow with one task, a splitter for the workflow using Workflow.__call__(plugin) """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + + return add2.out wf(plugin=plugin) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -609,16 +514,14 @@ def test_wf_st_1_call_selfplug(plugin, tmpdir): """Workflow with one task, a splitter for the workflow using Workflow.__call__() and using self.plugin """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + return add2.out + + outputs = wf()() - wf() - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -633,15 +536,14 @@ def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): using Workflow.__call__() without plugin and submitter (a submitter should be created within the __call__ function) """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + return add2.out + + outputs = wf()() - wf() - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -653,11 +555,13 @@ def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): def test_wf_st_1_inp_in_call(tmpdir): """Defining input in __call__""" - wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( - "x", x=[1, 2] - ) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow().split("x", x=[1, 2]) results = wf() assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -665,11 +569,13 @@ def test_wf_st_1_inp_in_call(tmpdir): def test_wf_st_1_upd_inp_call(tmpdir): """Updating input in __call___""" - wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( - "x", x=[11, 22] - ) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.set_output([("out", wf.add2.lzout.out)]) + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wf = Workflow().split("x", x=[11, 22]) results = wf(x=[1, 2]) assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -677,20 +583,18 @@ def test_wf_st_1_upd_inp_call(tmpdir): def test_wf_st_noinput_1(plugin, tmpdir): """Workflow with one task, a splitter for the workflow""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x).split("x", x=[])) + return add2.out checksum_before = wf.checksum - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() + assert results == [] # checking all directories assert wf.output_dir == [] @@ -698,65 +602,58 @@ def test_wf_st_noinput_1(plugin, tmpdir): def test_wf_ndst_1(plugin, tmpdir): """workflow with one task, a splitter on the task level""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) - wf.inputs.x = [1, 2] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2().split("x", x=x)) + return add2.out + + wf = Workflow(x=[1, 2]) checksum_before = wf.checksum - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + assert results.outputs.out == [3, 4] def test_wf_ndst_updatespl_1(plugin, tmpdir): """workflow with one task, a splitter on the task level is added *after* calling add """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2")) - wf.inputs.x = [1, 2] - wf.add2.split("x", x=wf.lzin.x) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(name="add2").split("x", x=x)) + return add2.out - results = wf.result() - # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + wf = Workflow(x=[1, 2]) - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] + assert results.outputs.out == [3, 4] def test_wf_ndst_updatespl_1a(plugin, tmpdir): """workflow with one task (initialize before calling add), a splitter on the task level is added *after* calling add """ - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - task_add2 = Add2(name="add2", x=wf.lzin.x) - wf.add(task_add2) - task_add2.split("x", x=[1, 2]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + task_add2 = workflow.add(Add2(name="add2", x=x).split("x", x=[1, 2])) + return add2.out - results = wf.result() - # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] + assert results.outputs.out == [3, 4] def test_wf_ndst_updateinp_1(plugin, tmpdir): @@ -764,56 +661,53 @@ def test_wf_ndst_updateinp_1(plugin, tmpdir): a splitter on the task level, updating input of the task after calling add """ - wf = Workflow(name="wf_spl_1", input_spec=["x", "y"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.add2.split("x", x=wf.lzin.y) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2 = workflow.add(Add2(x=x).split("x", x=y)) + return add2.out + + wf = Workflow(x=[1, 2], y=[11, 12]) - results = wf.result() - assert results.output.out == [13, 14] - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() + assert results.outputs.out == [13, 14] def test_wf_ndst_noinput_1(plugin, tmpdir): """workflow with one task, a splitter on the task level""" - wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) - wf.inputs.x = [] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2().split("x", x=x)) + return add2.out + + wf = Workflow(x=[]) checksum_before = wf.checksum - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() - assert results.output.out == [] - assert wf.output_dir.exists() + assert results.outputs.out == [] def test_wf_st_2(plugin, tmpdir): """workflow with one task, splitters and combiner for workflow""" - wf = Workflow(name="wf_st_2", input_spec=["x"]) - wf.add(Add2(name="add2", x=wf.lzin.x)) - wf.split("x", x=[1, 2]).combine(combiner="x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2(x=x)) + + return add2.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow().split("x", x=[1, 2]).combine(combiner="x") + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results[0].output.out == 3 assert results[1].output.out == 4 @@ -825,19 +719,19 @@ def test_wf_st_2(plugin, tmpdir): def test_wf_ndst_2(plugin, tmpdir): """workflow with one task, splitters and combiner on the task level""" - wf = Workflow(name="wf_ndst_2", input_spec=["x"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x).combine(combiner="x")) - wf.inputs.x = [1, 2] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2().split("x", x=x).combine(combiner="x")) + return add2.out + + wf = Workflow(x=[1, 2]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.output.out == [3, 4] - assert wf.output_dir.exists() + assert results.outputs.out == [3, 4] # workflows with structures A -> B @@ -845,15 +739,18 @@ def test_wf_ndst_2(plugin, tmpdir): def test_wf_st_3(plugin, tmpdir): """workflow with 2 tasks, splitter on wf level""" - wf = Workflow(name="wfst_3", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.split(("x", "y"), x=[1, 2], y=[11, 12]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + + return add2.out + + wf = Workflow().split(("x", "y"), x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) expected = [ ({"wfst_3.x": 1, "wfst_3.y": 11}, 13), @@ -864,7 +761,6 @@ def test_wf_st_3(plugin, tmpdir): ({"wfst_3.x": 1, "wfst_3.y": 1}, 26), ] - results = wf.result() for i, res in enumerate(expected): assert results[i].output.out == res[1] @@ -890,39 +786,36 @@ def test_wf_st_3(plugin, tmpdir): def test_wf_ndst_3(plugin, tmpdir): """Test workflow with 2 tasks, splitter on a task level""" - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) - - results = wf.result() + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf = Workflow(x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + # expected: [({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26)] - assert results.output.out == [13, 26] - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [13, 26] def test_wf_st_4(plugin, tmpdir): """workflow with two tasks, scalar splitter and combiner for the workflow""" - wf = Workflow(name="wf_st_4", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.split(("x", "y"), x=[1, 2], y=[11, 12]) - wf.combine("x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + + return add2.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow().split(("x", "y"), x=[1, 2], y=[11, 12]).combine("x") + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] @@ -936,41 +829,38 @@ def test_wf_st_4(plugin, tmpdir): def test_wf_ndst_4(plugin, tmpdir): """workflow with two tasks, scalar splitter and combiner on tasks level""" - wf = Workflow(name="wf_ndst_4", input_spec=["a", "b"]) - wf.add(Multiply(name="mult").split(("x", "y"), x=wf.lzin.a, y=wf.lzin.b)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - wf.inputs.a = [1, 2] - wf.inputs.b = [11, 12] + @workflow.define + def Workflow(a, b): + mult = workflow.add(Multiply().split(("x", "y"), x=a, y=b)) + add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) - with Submitter(worker=plugin) as sub: - sub(wf) + return add2.out + + wf = Workflow(a=[1, 2], b=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] - assert results.output.out == [13, 26] - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [13, 26] def test_wf_st_5(plugin, tmpdir): """workflow with two tasks, outer splitter and no combiner""" - wf = Workflow(name="wf_st_5", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.split(["x", "y"], x=[1, 2], y=[11, 12]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out).split(["x", "y"], x=[1, 2], y=[11, 12])) + + return add2.out - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert results[0].output.out == 13 assert results[1].output.out == 14 assert results[2].output.out == 24 @@ -983,41 +873,39 @@ def test_wf_st_5(plugin, tmpdir): def test_wf_ndst_5(plugin, tmpdir): """workflow with two tasks, outer splitter on tasks level and no combiner""" - wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) - - results = wf.result() - assert results.output.out[0] == 13 - assert results.output.out[1] == 14 - assert results.output.out[2] == 24 - assert results.output.out[3] == 26 - # checking the output directory - assert wf.output_dir.exists() + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf = Workflow(x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out[0] == 13 + assert results.outputs.out[1] == 14 + assert results.outputs.out[2] == 24 + assert results.outputs.out[3] == 26 def test_wf_st_6(plugin, tmpdir): """workflow with two tasks, outer splitter and combiner for the workflow""" - wf = Workflow(name="wf_st_6", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]) - wf.combine("x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + + return add2.out + + wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert results[0][0].output.out == 13 assert results[0][1].output.out == 24 assert results[0][2].output.out == 35 @@ -1032,90 +920,73 @@ def test_wf_st_6(plugin, tmpdir): def test_wf_ndst_6(plugin, tmpdir): """workflow with two tasks, outer splitter and combiner on tasks level""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) + return add2.out - results = wf.result() - assert results.output.out[0] == [13, 24, 35] - assert results.output.out[1] == [14, 26, 38] + wf = Workflow(x=[1, 2, 3], y=[11, 12]) - # checking the output directory - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out[0] == [13, 24, 35] + assert results.outputs.out[1] == [14, 26, 38] def test_wf_ndst_7(plugin, tmpdir): """workflow with two tasks, outer splitter and (full) combiner for first node only""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split("x", x=wf.lzin.x, y=wf.lzin.y).combine("x")) - wf.add(Identity(name="iden", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = 11 - wf.set_output([("out", wf.iden.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split("x", x=x, y=y).combine("x")) + iden = workflow.add(Identity(x=mult.out)) + return iden.out + + wf = Workflow(x=[1, 2, 3], y=11) - results = wf.result() - assert results.output.out == [11, 22, 33] + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [11, 22, 33] def test_wf_ndst_8(plugin, tmpdir): """workflow with two tasks, outer splitter and (partial) combiner for first task only""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add( - Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") - ) - wf.add(Identity(name="iden", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.iden.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) + iden = workflow.add(Identity(x=mult.out)) + + return iden.out + + wf = Workflow(x=[1, 2, 3], y=[11, 12]) - results = wf.result() - assert results.output.out[0] == [11, 22, 33] - assert results.output.out[1] == [12, 24, 36] + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out[0] == [11, 22, 33] + assert results.outputs.out[1] == [12, 24, 36] def test_wf_ndst_9(plugin, tmpdir): """workflow with two tasks, outer splitter and (full) combiner for first task only""" - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add( - Multiply(name="mult") - .split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y) - .combine(["x", "y"]) - ) - wf.add(Identity(name="iden", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.iden.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine(["x", "y"])) + iden = workflow.add(Identity(x=mult.out)) + return iden.out + + wf = Workflow(x=[1, 2, 3], y=[11, 12]) - results = wf.result() - assert results.output.out == [11, 12, 22, 24, 33, 36] + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [11, 12, 22, 24, 33, 36] # workflows with structures A -> B -> C @@ -1123,17 +994,18 @@ def test_wf_ndst_9(plugin, tmpdir): def test_wf_3sernd_ndst_1(plugin, tmpdir): """workflow with three "serial" tasks, checking if the splitter is propagating""" - wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2_1st", x=wf.mult.lzout.out)) - wf.add(Add2(name="add2_2nd", x=wf.add2_1st.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2_2nd.lzout.out)]) - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) + add2_1st = workflow.add(Add2(x=mult.out)) + add2_2nd = workflow.add(Add2(x=add2_1st.out)) + return add2_2nd.out + + wf = Workflow(x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # splitter from the first task should propagate to all tasks, # splitter_rpn should be the same in all tasks @@ -1147,13 +1019,10 @@ def test_wf_3sernd_ndst_1(plugin, tmpdir): == wf.add2_2nd.state.splitter_rpn ) - results = wf.result() - assert results.output.out[0] == 15 - assert results.output.out[1] == 16 - assert results.output.out[2] == 26 - assert results.output.out[3] == 28 - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out[0] == 15 + assert results.outputs.out[1] == 16 + assert results.outputs.out[2] == 26 + assert results.outputs.out[3] == 28 def test_wf_3sernd_ndst_1a(plugin, tmpdir): @@ -1162,17 +1031,18 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): first task has a splitter that propagates to the 2nd task, and the 2nd task is adding one more input to the splitter """ - wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(Add2(name="add2_1st").split("x", x=wf.lzin.x)) - wf.add(Multiply(name="mult", x=wf.add2_1st.lzout.out).split("y", y=wf.lzin.y)) - wf.add(Add2(name="add2_2nd", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.add2_2nd.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2_1st = workflow.add(Add2().split("x", x=x)) + mult = workflow.add(Multiply(x=add2_1st.out).split("y", y=y)) + add2_2nd = workflow.add(Add2(x=mult.out)) + return add2_2nd.out + + wf = Workflow(x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # splitter from the 1st task should propagate and the 2nd task should add one more # splitter_rpn for the 2nd and the 3rd task should be the same @@ -1185,13 +1055,10 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): == wf.add2_2nd.state.splitter_rpn ) - results = wf.result() - assert results.output.out[0] == 35 - assert results.output.out[1] == 38 - assert results.output.out[2] == 46 - assert results.output.out[3] == 50 - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out[0] == 35 + assert results.outputs.out[1] == 38 + assert results.outputs.out[2] == 46 + assert results.outputs.out[3] == 50 # workflows with structures A -> C, B -> C @@ -1202,19 +1069,20 @@ def test_wf_3nd_st_1(plugin_dask_opt, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter on the workflow level """ - wf = Workflow(name="wf_st_7", input_spec=["x", "y"]) - wf.add(Add2(name="add2x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]) - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2(x=x)) + add2y = workflow.add(Add2(x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + + return mult.out + + wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]) with Submitter(worker=plugin_dask_opt) as sub: - sub(wf) + results = sub(wf) - results = wf.result() assert len(results) == 6 assert results[0].output.out == 39 assert results[1].output.out == 42 @@ -1230,42 +1098,40 @@ def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter on the tasks levels """ - wf = Workflow(name="wf_ndst_7", input_spec=["x", "y"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + return mult.out + + wf = Workflow(x=[1, 2, 3], y=[11, 12]) with Submitter(worker=plugin_dask_opt) as sub: - sub(wf) + results = sub(wf) - results = wf.result() - assert len(results.output.out) == 6 - assert results.output.out == [39, 42, 52, 56, 65, 70] - # checking the output directory - assert wf.output_dir.exists() + assert len(results.outputs.out) == 6 + assert results.outputs.out == [39, 42, 52, 56, 65, 70] def test_wf_3nd_st_2(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner on the workflow level """ - wf = Workflow(name="wf_st_8", input_spec=["x", "y"]) - wf.add(Add2(name="add2x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2(x=x)) + add2y = workflow.add(Add2(x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + return mult.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert len(results) == 2 assert results[0][0].output.out == 39 assert results[0][1].output.out == 52 @@ -1283,47 +1149,41 @@ def test_wf_3nd_ndst_2(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner on the tasks levels """ - wf = Workflow(name="wf_ndst_8", input_spec=["x", "y"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( - "add2x.x" - ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker="serial") as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out).combine("add2x.x")) + return mult.out - results = wf.result() - assert len(results.output.out) == 2 - assert results.output.out[0] == [39, 52, 65] - assert results.output.out[1] == [42, 56, 70] - # checking the output directory - assert wf.output_dir.exists() + wf = Workflow(x=[1, 2, 3], y=[11, 12]) + + with Submitter(cache_dir=tmpdir) as sub: + results = sub(wf) + + assert len(results.outputs.out) == 2 + assert results.outputs.out[0] == [39, 52, 65] + assert results.outputs.out[1] == [42, 56, 70] def test_wf_3nd_st_3(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner (from the second task) on the workflow level """ - wf = Workflow(name="wf_st_9", input_spec=["x", "y"]) - wf.add(Add2(name="add2x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2(x=x)) + add2y = workflow.add(Add2(x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + return mult.out + + wf = Workflow.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert len(results) == 3 assert results[0][0].output.out == 39 assert results[0][1].output.out == 42 @@ -1341,48 +1201,42 @@ def test_wf_3nd_ndst_3(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner (from the second task) on the tasks levels """ - wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( - "add2y.x" - ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out).combine("add2y.x")) + return mult.out - results = wf.result() - assert len(results.output.out) == 3 - assert results.output.out[0] == [39, 42] - assert results.output.out[1] == [52, 56] - assert results.output.out[2] == [65, 70] - # checking the output directory - assert wf.output_dir.exists() + wf = Workflow(x=[1, 2, 3], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert len(results.outputs.out) == 3 + assert results.outputs.out[0] == [39, 42] + assert results.outputs.out[1] == [52, 56] + assert results.outputs.out[2] == [65, 70] def test_wf_3nd_st_4(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter and full combiner on the workflow level """ - wf = Workflow(name="wf_st_10", input_spec=["x", "y"]) - wf.add(Add2(name="add2x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) - wf.set_output([("out", wf.mult.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2(x=x)) + add2y = workflow.add(Add2(x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + return mult.out + + wf = Workflow.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert len(results) == 6 assert results[0].output.out == 39 assert results[1].output.out == 42 @@ -1400,52 +1254,43 @@ def test_wf_3nd_ndst_4(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, splitter and full combiner on the tasks levels """ - wf = Workflow(name="wf_ndst_10", input_spec=["x", "y"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( - ["add2x.x", "add2y.x"] + + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=y)) + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).combine(["add2x.x", "add2y.x"]) ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + return mult.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=[1, 2, 3], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() - assert len(results.output.out) == 6 - assert results.output.out == [39, 42, 52, 56, 65, 70] - # checking the output directory - assert wf.output_dir.exists() + assert len(results.outputs.out) == 6 + assert results.outputs.out == [39, 42, 52, 56, 65, 70] def test_wf_3nd_st_5(plugin, tmpdir): """workflow with three tasks (A->C, B->C) and three fields in the splitter, splitter and partial combiner (from the second task) on the workflow level """ - wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(Add2(name="add2x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add( - FunAddVar3( - name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out, c=wf.lzin.z - ) - ) - wf.split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]).combine("y") - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y, z): + add2x = workflow.add(Add2(x=x)) + add2y = workflow.add(Add2(x=y)) + addvar = workflow.add(FunAddVar3(a=add2x.out, b=add2y.out, c=z)) + return addvar.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow.split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]).combine("y") + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert len(results) == 4 assert results[0][0].output.out == 27 assert results[0][1].output.out == 28 @@ -1466,84 +1311,75 @@ def test_wf_3nd_ndst_5(plugin, tmpdir): """workflow with three tasks (A->C, B->C) and three fields in the splitter, all tasks have splitters and the last one has a partial combiner (from the 2nd) """ - wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - FunAddVar3(name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out) - .split("c", c=wf.lzin.z) - .combine("add2x.x") - ) - wf.inputs.x = [2, 3] - wf.inputs.y = [11, 12] - wf.inputs.z = [10, 100] - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y, z): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=y)) + addvar = workflow.add( + FunAddVar3(a=add2x.out, b=add2y.out).split("c", c=z).combine("add2x.x") + ) + + return addvar.out + + wf = Workflow(x=[2, 3], y=[11, 12], z=[10, 100]) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() - assert len(results.output.out) == 4 - assert results.output.out[0] == [27, 28] - assert results.output.out[1] == [117, 118] - assert results.output.out[2] == [28, 29] - assert results.output.out[3] == [118, 119] + assert len(results.outputs.out) == 4 + assert results.outputs.out[0] == [27, 28] + assert results.outputs.out[1] == [117, 118] + assert results.outputs.out[2] == [28, 29] + assert results.outputs.out[3] == [118, 119] # checking all directories - assert wf.output_dir.exists() def test_wf_3nd_ndst_6(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, the third one uses scalar splitter from the previous ones and a combiner """ - wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.y)) - wf.add( - Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out) - .split(("_add2x", "_add2y")) - .combine("add2y.x") - ) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=y)) + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out) + .split(("_add2x", "_add2y")) + .combine("add2y.x") + ) + return mult.out + + wf = Workflow(x=[1, 2], y=[11, 12]) - results = wf.result() - assert results.output.out == [39, 56] - # checking the output directory - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == [39, 56] def test_wf_3nd_ndst_7(plugin, tmpdir): """workflow with three tasks, third one connected to two previous tasks, the third one uses scalar splitter from the previous ones """ - wf = Workflow(name="wf_ndst_9", input_spec=["x"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y").split("x", x=wf.lzin.x)) - wf.add( - Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).split( - ("_add2x", "_add2y") + + @workflow.define + def Workflow(x): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2().split("x", x=x)) + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).split(("_add2x", "_add2y")) ) - ) - wf.inputs.x = [1, 2] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + return mult.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=[1, 2]) - results = wf.result() - assert results.output.out == [9, 16] - # checking the output directory - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == [9, 16] # workflows with structures A -> B -> C with multiple connections @@ -1551,38 +1387,29 @@ def test_wf_3nd_ndst_7(plugin, tmpdir): def test_wf_3nd_8(tmpdir): """workflow with three tasks A->B->C vs two tasks A->C with multiple connections""" - wf = Workflow(name="wf", input_spec=["zip"], cache_dir=tmpdir) - wf.inputs.zip = [["test1", "test3", "test5"], ["test2", "test4", "test6"]] - wf.add(Identity2Flds(name="iden2flds_1", x2="Hoi").split("x1", x1=wf.lzin.zip)) + @workflow.define(outputs=["out1", "out2", "out1a", "out2a"]) + def Workflow(zip): - wf.add(Identity(name="identity", x=wf.iden2flds_1.lzout.out1)) + iden2flds_1 = workflow.add(Identity2Flds(x2="Hoi").split("x1", x1=zip)) - wf.add( - Identity2Flds( - name="iden2flds_2", x1=wf.identity.lzout.out, x2=wf.iden2flds_1.lzout.out2 - ) - ) + identity = workflow.add(Identity(x=iden2flds_1.out1)) + + iden2flds_2 = workflow.add(Identity2Flds(x1=identity.out, x2=iden2flds_1.out2)) - wf.add( - Identity2Flds( - name="iden2flds_2a", - x1=wf.iden2flds_1.lzout.out1, - x2=wf.iden2flds_1.lzout.out2, + iden2flds_2a = workflow.add( + Identity2Flds( + x1=iden2flds_1.out1, + x2=iden2flds_1.out2, + ) ) - ) - wf.set_output( - [ - ("out1", wf.iden2flds_2.lzout.out1), - ("out2", wf.iden2flds_2.lzout.out2), - ("out1a", wf.iden2flds_2a.lzout.out1), - ("out2a", wf.iden2flds_2a.lzout.out2), - ] - ) + return iden2flds_2.out1, iden2flds_2.out2, iden2flds_2a.out1, iden2flds_2a.out2 + + wf = Workflow(zip=[["test1", "test3", "test5"], ["test2", "test4", "test6"]]) with Submitter(worker="cf") as sub: - sub(wf) + results = sub(wf) res = wf.result() @@ -1602,27 +1429,25 @@ def test_wf_ndstLR_1(plugin, tmpdir): The second task has its own simple splitter and the Left part from the first task should be added """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) - wf.add(Multiply(name="mult", x=wf.add2.lzout.out).split("y", y=wf.lzin.y)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2 = workflow.add(Add2().split("x", x=x)) + mult = workflow.add(Multiply(x=add2.out).split("y", y=y)) + return mult.out + + wf = Workflow(x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # checking if the splitter is created properly assert wf.mult.state.splitter == ["_add2", "mult.y"] assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] - results = wf.result() # expected: [({"add2.x": 1, "mult.y": 11}, 33), ({"add2.x": 1, "mult.y": 12}, 36), # ({"add2.x": 2, "mult.y": 11}, 44), ({"add2.x": 2, "mult.y": 12}, 48)] - assert results.output.out == [33, 36, 44, 48] - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [33, 36, 44, 48] def test_wf_ndstLR_1a(plugin, tmpdir): @@ -1630,29 +1455,25 @@ def test_wf_ndstLR_1a(plugin, tmpdir): The second task has splitter that has Left part (from previous state) and the Right part (it's own splitter) """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) - wf.add( - Multiply(name="mult").split(["_add2", "y"], x=wf.add2.lzout.out, y=wf.lzin.y) - ) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2 = workflow.add(Add2().split("x", x=x)) + mult = workflow.add(Multiply().split(["_add2", "y"], x=add2.out, y=y)) + return mult.out + + wf = Workflow(x=[1, 2], y=[11, 12]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # checking if the splitter is created properly assert wf.mult.state.splitter == ["_add2", "mult.y"] assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] - results = wf.result() # expected: [({"add2.x": 1, "mult.y": 11}, 33), ({"add2.x": 1, "mult.y": 12}, 36), # ({"add2.x": 2, "mult.y": 11}, 44), ({"add2.x": 2, "mult.y": 12}, 48)] - assert results.output.out == [33, 36, 44, 48] - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [33, 36, 44, 48] def test_wf_ndstLR_2(plugin, tmpdir): @@ -1660,33 +1481,28 @@ def test_wf_ndstLR_2(plugin, tmpdir): The second task has its own outer splitter and the Left part from the first task should be added """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) - wf.add( - FunAddVar3(name="addvar", a=wf.add2.lzout.out).split( - ["b", "c"], b=wf.lzin.y, c=wf.lzin.z - ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [10, 20] - wf.inputs.z = [100, 200] - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y, z): + add2 = workflow.add(Add2().split("x", x=x)) + addvar = workflow.add(FunAddVar3(a=add2.out).split(["b", "c"], b=y, c=z)) + return addvar.out + + wf = Workflow(x=[1, 2, 3], y=[10, 20], z=[100, 200]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # checking if the splitter is created properly assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] - results = wf.result() # expected: [({"add2.x": 1, "mult.b": 10, "mult.c": 100}, 113), # ({"add2.x": 1, "mult.b": 10, "mult.c": 200}, 213), # ({"add2.x": 1, "mult.b": 20, "mult.c": 100}, 123), # ({"add2.x": 1, "mult.b": 20, "mult.c": 200}, 223), # ...] - assert results.output.out == [ + assert results.outputs.out == [ 113, 213, 123, @@ -1700,8 +1516,6 @@ def test_wf_ndstLR_2(plugin, tmpdir): 125, 225, ] - # checking the output directory - assert wf.output_dir.exists() def test_wf_ndstLR_2a(plugin, tmpdir): @@ -1709,33 +1523,31 @@ def test_wf_ndstLR_2a(plugin, tmpdir): The second task has splitter that has Left part (from previous state) and the Right part (it's own outer splitter) """ - wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(Add2(name="add2").split("x", x=wf.lzin.x)) - wf.add( - FunAddVar3(name="addvar", a=wf.add2.lzout.out).split( - ["_add2", ["b", "c"]], b=wf.lzin.y, c=wf.lzin.z + + @workflow.define + def Workflow(x, y, z): + add2 = workflow.add(Add2().split("x", x=x)) + addvar = workflow.add( + FunAddVar3(a=add2.out).split(["_add2", ["b", "c"]], b=y, c=z) ) - ) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = [10, 20] - wf.inputs.z = [100, 200] - wf.set_output([("out", wf.addvar.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + return addvar.out + + wf = Workflow(x=[1, 2, 3], y=[10, 20], z=[100, 200]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # checking if the splitter is created properly assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] - results = wf.result() # expected: [({"add2.x": 1, "mult.b": 10, "mult.c": 100}, 113), # ({"add2.x": 1, "mult.b": 10, "mult.c": 200}, 213), # ({"add2.x": 1, "mult.b": 20, "mult.c": 100}, 123), # ({"add2.x": 1, "mult.b": 20, "mult.c": 200}, 223), # ...] - assert results.output.out == [ + assert results.outputs.out == [ 113, 213, 123, @@ -1749,8 +1561,6 @@ def test_wf_ndstLR_2a(plugin, tmpdir): 125, 225, ] - # checking the output directory - assert wf.output_dir.exists() # workflows with inner splitters A -> B (inner spl) @@ -1760,74 +1570,69 @@ def test_wf_ndstinner_1(plugin, tmpdir): """workflow with 2 tasks, the second task has inner splitter """ - wf = Workflow(name="wf_st_3", input_spec={"x": int}) - wf.add(ListOutput(name="list", x=wf.lzin.x)) - wf.add(Add2(name="add2").split("x", x=wf.list.lzout.out)) - wf.inputs.x = 1 - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out"]) + def Workflow(x: int): + list = workflow.add(ListOutput(x=x)) + add2 = workflow.add(Add2().split("x", x=list.out)) + return list.out, add2.out + + wf = Workflow(x=1) # + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.add2.state.splitter == "add2.x" assert wf.add2.state.splitter_rpn == ["add2.x"] - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [3, 4, 5] - - assert wf.output_dir.exists() + assert results.outputs.out_list == [1, 2, 3] + assert results.outputs.out == [3, 4, 5] def test_wf_ndstinner_2(plugin, tmpdir): """workflow with 2 tasks, the second task has two inputs and inner splitter from one of the input """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(ListOutput(name="list", x=wf.lzin.x)) - wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) - wf.inputs.x = 1 - wf.inputs.y = 10 - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define(outputs=["out_list", "out"]) + def Workflow(x, y): + list = workflow.add(ListOutput(x=x)) + mult = workflow.add(Multiply(y=y).split("x", x=list.out)) + return list.out, mult.out + + wf = Workflow(x=1, y=10) # + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.mult.state.splitter == "mult.x" assert wf.mult.state.splitter_rpn == ["mult.x"] - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [10, 20, 30] - - assert wf.output_dir.exists() + assert results.outputs.out_list == [1, 2, 3] + assert results.outputs.out == [10, 20, 30] def test_wf_ndstinner_3(plugin, tmpdir): """workflow with 2 tasks, the second task has two inputs and outer splitter that includes an inner field """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(ListOutput(name="list", x=wf.lzin.x)) - wf.add(Multiply(name="mult").split(["x", "y"], x=wf.list.lzout.out, y=wf.lzin.y)) - wf.inputs.x = 1 - wf.inputs.y = [10, 100] - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + list = workflow.add(ListOutput(x=x)) + mult = workflow.add(Multiply().split(["x", "y"], x=list.out, y=y)) + return list.out + + wf = Workflow(x=1, y=[10, 100]), mult.out # (outputs=["out_list", "out"]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.mult.state.splitter == ["mult.x", "mult.y"] assert wf.mult.state.splitter_rpn == ["mult.x", "mult.y", "*"] - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [10, 100, 20, 200, 30, 300] - - assert wf.output_dir.exists() + assert results.outputs.out_list == [1, 2, 3] + assert results.outputs.out == [10, 100, 20, 200, 30, 300] def test_wf_ndstinner_4(plugin, tmpdir): @@ -1835,28 +1640,26 @@ def test_wf_ndstinner_4(plugin, tmpdir): the second task has two inputs and inner splitter from one of the input, the third task has no its own splitter """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(ListOutput(name="list", x=wf.lzin.x)) - wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = 1 - wf.inputs.y = 10 - wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + list = workflow.add(ListOutput(x=x)) + mult = workflow.add(Multiply(y=y).split("x", x=list.out)) + add2 = workflow.add(Add2(x=mult.out)) + return list.out + + wf = Workflow(x=1, y=10), add2.out # (outputs=["out_list", "out"]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.mult.state.splitter == "mult.x" assert wf.mult.state.splitter_rpn == ["mult.x"] assert wf.add2.state.splitter == "_mult" assert wf.add2.state.splitter_rpn == ["mult.x"] - results = wf.result() - assert results.output.out_list == [1, 2, 3] - assert results.output.out == [12, 22, 32] - - assert wf.output_dir.exists() + assert results.outputs.out_list == [1, 2, 3] + assert results.outputs.out == [12, 22, 32] def test_wf_ndstinner_5(plugin, tmpdir): @@ -1866,25 +1669,26 @@ def test_wf_ndstinner_5(plugin, tmpdir): there is a inner_cont_dim) the third task has no new splitter """ - wf = Workflow(name="wf_5", input_spec=["x", "y", "b"]) - wf.add(ListOutput(name="list").split("x", x=wf.lzin.x)) - wf.add(Multiply(name="mult").split(["y", "x"], x=wf.list.lzout.out, y=wf.lzin.y)) - wf.add(FunAddVar(name="addvar", a=wf.mult.lzout.out).split("b", b=wf.lzin.b)) - wf.inputs.x = [1, 2] - wf.inputs.y = [10, 100] - wf.inputs.b = [3, 5] + + @workflow.define + def Workflow(x, y, b): + list = workflow.add(ListOutput().split("x", x=x)) + mult = workflow.add(Multiply().split(["y", "x"], x=list.out, y=y)) + addvar = workflow.add(FunAddVar(a=mult.out).split("b", b=b)) + + wf = Workflow(x=[1, 2], y=[10, 100]) + wf = Workflow(b=[3, 5]) wf.set_output( [ - ("out_list", wf.list.lzout.out), - ("out_mult", wf.mult.lzout.out), - ("out_add", wf.addvar.lzout.out), + ("out_list", list.out), + ("out_mult", mult.out), + ("out_add", addvar.out), ] ) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.mult.state.splitter == ["_list", ["mult.y", "mult.x"]] assert wf.mult.state.splitter_rpn == ["list.x", "mult.y", "mult.x", "*", "*"] @@ -1899,9 +1703,8 @@ def test_wf_ndstinner_5(plugin, tmpdir): "*", ] - results = wf.result() - assert results.output.out_list == [[1, 2, 3], [2, 4, 6]] - assert results.output.out_mult == [ + assert results.outputs.out_list == [[1, 2, 3], [2, 4, 6]] + assert results.outputs.out_mult == [ 10, 20, 30, @@ -1915,7 +1718,7 @@ def test_wf_ndstinner_5(plugin, tmpdir): 400, 600, ] - assert results.output.out_add == [ + assert results.outputs.out_add == [ 13, 15, 23, @@ -1942,27 +1745,25 @@ def test_wf_ndstinner_5(plugin, tmpdir): 605, ] - assert wf.output_dir.exists() - # workflow that have some single values as the input def test_wf_st_singl_1(plugin, tmpdir): """workflow with two tasks, only one input is in the splitter and combiner""" - wf = Workflow(name="wf_st_5", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.split("x", x=[1, 2], y=11) - wf.combine("x") - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + + return add2.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow().split("x", x=[1, 2], y=11).combine("x") + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert results[0].output.out == 13 assert results[1].output.out == 24 # checking all directories @@ -1975,21 +1776,19 @@ def test_wf_ndst_singl_1(plugin, tmpdir): """workflow with two tasks, outer splitter and combiner on tasks level; only one input is part of the splitter, the other is a single value """ - wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.inputs.x = [1, 2] - wf.inputs.y = 11 - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(y=y).split("x", x=x)) + add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) + return add2.out + + wf = Workflow(x=[1, 2], y=11) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() - assert results.output.out == [13, 24] - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [13, 24] def test_wf_st_singl_2(plugin, tmpdir): @@ -1997,19 +1796,19 @@ def test_wf_st_singl_2(plugin, tmpdir): splitter on the workflow level only one input is part of the splitter, the other is a single value """ - wf = Workflow(name="wf_st_6", input_spec=["x", "y"]) - wf.add(Add2(name="add2x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.split("x", x=[1, 2, 3], y=11) - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2(x=x)) + add2y = workflow.add(Add2(x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + return mult.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow.split("x", x=[1, 2, 3], y=11) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() assert len(results) == 3 assert results[0].output.out == 39 assert results[1].output.out == 52 @@ -2025,23 +1824,21 @@ def test_wf_ndst_singl_2(plugin, tmpdir): splitter on the tasks levels only one input is part of the splitter, the other is a single value """ - wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(Add2(name="add2x").split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2y", x=wf.lzin.y)) - wf.add(Multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.inputs.y = 11 - wf.set_output([("out", wf.mult.lzout.out)]) - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + add2x = workflow.add(Add2().split("x", x=x)) + add2y = workflow.add(Add2(x=y)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + return mult.out + + wf = Workflow(x=[1, 2, 3], y=11) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() - assert len(results.output.out) == 3 - assert results.output.out == [39, 52, 65] - # checking the output directory - assert wf.output_dir.exists() + assert len(results.outputs.out) == 3 + assert results.outputs.out == [39, 52, 65] # workflows with structures wf(A) @@ -2051,23 +1848,23 @@ def test_wfasnd_1(plugin, tmpdir): """workflow as a node workflow-node with one task and no splitter """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.inputs.x = 2 - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x)) + return wfnd.out + + wf = Workflow(x=2) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() - assert results.output.out == 4 - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == 4 def test_wfasnd_wfinp_1(plugin, tmpdir): @@ -2075,25 +1872,26 @@ def test_wfasnd_wfinp_1(plugin, tmpdir): workflow-node with one task and no splitter input set for the main workflow """ - wf = Workflow(name="wf", input_spec=["x"]) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.inputs.x = 2 - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + wf = Workflow(x=2) checksum_before = wf.checksum - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() - assert results.output.out == 4 - # checking the output directory - assert wf.output_dir.exists() + + assert results.outputs.out == 4 def test_wfasnd_wfndupdate(plugin, tmpdir): @@ -2102,22 +1900,22 @@ def test_wfasnd_wfndupdate(plugin, tmpdir): wfasnode input is updated to use the main workflow input """ - wfnd = Workflow(name="wfnd", input_spec=["x"], x=2) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out - wf = Workflow(name="wf", input_spec=["x"], x=3) - wfnd.inputs.x = wf.lzin.x - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x)) + return wfnd.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=3) - results = wf.result() - assert results.output.out == 5 - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == 5 def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): @@ -2127,41 +1925,40 @@ def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): updated to use the main workflow input """ - wfnd = Workflow(name="wfnd", input_spec=["x"], x=2) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + wfnd = Wfnd(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wfnd) - wf = Workflow(name="wf", input_spec=["x"], x=3) - # trying to set before - wfnd.inputs.x = wf.lzin.x - wf.add(wfnd) - # trying to set after add... - wf.wfnd.inputs.x = wf.lzin.x - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x)) + return wfnd.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=3) - results = wf.result() - assert results.output.out == 5 - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == 5 # adding another layer of workflow wf_o = Workflow(name="wf_o", input_spec=["x"], x=4) - wf.inputs.x = wf_o.lzin.x + wf = Workflow(x=wf_o.lzin.x) wf_o.add(wf) wf_o.set_output([("out", wf_o.wf.lzout.out)]) wf_o.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf_o) results = wf_o.result() - assert results.output.out == 6 + assert results.outputs.out == 6 assert wf_o.output_dir.exists() @@ -2170,25 +1967,24 @@ def test_wfasnd_st_1(plugin, tmpdir): workflow-node with one task, splitter for wfnd """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.split("x", x=[2, 4]) - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x).split("x", x=[2, 4])) + return wfnd.out checksum_before = wf.checksum - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.checksum == checksum_before - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + + assert results.outputs.out == [4, 6] def test_wfasnd_st_updatespl_1(plugin, tmpdir): @@ -2196,23 +1992,23 @@ def test_wfasnd_st_updatespl_1(plugin, tmpdir): workflow-node with one task, splitter for wfnd is set after add """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wfnd.split("x", x=[2, 4]) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x).split(x=x)) + return wfnd.out + + wf = Workflow(x=[2, 4]) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + assert results.outputs.out == [4, 6] def test_wfasnd_ndst_1(plugin, tmpdir): @@ -2220,25 +2016,23 @@ def test_wfasnd_ndst_1(plugin, tmpdir): workflow-node with one task, splitter for node """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2").split("x", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - # TODO: without this the test is failing - wfnd.plugin = plugin - wfnd.inputs.x = [2, 4] - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2().split("x", x=x)) + return add2.out - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + wf = Workflow(x=[2, 4]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == [4, 6] def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): @@ -2246,23 +2040,23 @@ def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): workflow-node with one task, splitter for node added after add """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.add2.split("x", x=[2, 4]) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Workflow(x): + add2 = workflow.add(Add2().split("x", x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=[2, 4]) - results = wf.result() - assert results.output.out == [4, 6] - # checking the output directory - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == [4, 6] def test_wfasnd_wfst_1(plugin, tmpdir): @@ -2270,19 +2064,23 @@ def test_wfasnd_wfst_1(plugin, tmpdir): workflow-node with one task, splitter for the main workflow """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.split("x", x=[2, 4]) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + wf = Workflow().split("x", x=[2, 4]) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() + assert results[0].output.out == 4 assert results[1].output.out == 6 # checking all directories @@ -2299,24 +2097,25 @@ def test_wfasnd_st_2(plugin, tmpdir): the main workflow has two tasks, splitter for wfnd """ - wfnd = Workflow(name="wfnd", input_spec=["x", "y"]) - wfnd.add(Multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) - wfnd.set_output([("out", wfnd.mult.lzout.out)]) - wfnd.split(("x", "y"), x=[2, 4], y=[1, 10]) - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(wfnd) - wf.add(Add2(name="add2", x=wf.wfnd.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y)) + return mult.out + + @workflow.define + def Workflow(x, y): + wfnd = workflow.add(Wfnd(x=x, y=y)) + add2 = workflow.add(Add2(x=wfnd.out)) + return add2.out + + wf = Workflow(x=[2, 4], y=[1, 10]) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() - assert results.output.out == [4, 42] - # checking the output directory - assert wf.output_dir.exists() + + assert results.outputs.out == [4, 42] def test_wfasnd_wfst_2(plugin, tmpdir): @@ -2324,21 +2123,24 @@ def test_wfasnd_wfst_2(plugin, tmpdir): the main workflow has two tasks, splitter for the main workflow """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wfnd = Workflow(name="wfnd", input_spec=["x", "y"], x=wf.lzin.x, y=wf.lzin.y) - wfnd.add(Multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) - wfnd.set_output([("out", wfnd.mult.lzout.out)]) - wf.add(wfnd) - wf.add(Add2(name="add2", x=wf.wfnd.lzout.out)) - wf.split(("x", "y"), x=[2, 4], y=[1, 10]) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + return mult.out + + @workflow.define + def Workflow(x, y): + wfnd = workflow.add(Wfnd(x=x, y=y)) + add2 = workflow.add(Add2(x=wfnd.out)) + return add2.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow().split(("x", "y"), x=[2, 4], y=[1, 10]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() + assert results[0].output.out == 4 assert results[1].output.out == 42 # checking all directories @@ -2355,26 +2157,25 @@ def test_wfasnd_ndst_3(plugin, tmpdir): the main workflow has two tasks, splitter for the first task """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) - wf.inputs.x = [2, 4] - wf.inputs.y = [1, 10] - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y)) + wfnd = workflow.add(Wfnd(mult.out)) + return wfnd.out - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + wf = Workflow(x=[2, 4], y=[1, 10]) - with Submitter(worker="serial") as sub: - sub(wf) + with Submitter(cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() - assert results.output.out == [4, 42] - # checking the output directory - assert wf.output_dir.exists() + + assert results.outputs.out == [4, 42] def test_wfasnd_wfst_3(plugin, tmpdir): @@ -2382,23 +2183,26 @@ def test_wfasnd_wfst_3(plugin, tmpdir): the main workflow has two tasks, splitter for the main workflow """ - wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.split(("x", "y"), x=[2, 4], y=[1, 10]) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + + wfnd = workflow.add(Wfnd(mult.out)) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.plugin = plugin - wf.cache_dir = tmpdir + return wfnd.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow().split(("x", "y"), x=[2, 4], y=[1, 10]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() + assert results[0].output.out == 4 assert results[1].output.out == 42 # checking all directories @@ -2414,24 +2218,24 @@ def test_wfasnd_4(plugin, tmpdir): """workflow as a node workflow-node with two tasks and no splitter """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2_1st", x=wfnd.lzin.x)) - wfnd.add(Add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) - wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) - wfnd.inputs.x = 2 - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2_1st = workflow.add(Add2(x=x)) + add2_2nd = workflow.add(Add2(x=add2_1st.out)) + return add2_2nd.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=2)) + return wfnd.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=2) - results = wf.result() - assert results.output.out == 6 - # checking the output directory - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == 6 def test_wfasnd_ndst_4(plugin, tmpdir): @@ -2439,24 +2243,24 @@ def test_wfasnd_ndst_4(plugin, tmpdir): workflow-node with two tasks, splitter for node """ - wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(Add2(name="add2_1st").split("x", x=wfnd.lzin.x)) - wfnd.add(Add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) - wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) - wfnd.inputs.x = [2, 4] - wf = Workflow(name="wf", input_spec=["x"]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) - wf.cache_dir = tmpdir + @workflow.define + def Wfnd(x): + add2_1st = workflow.add(Add2().split(x=x)) + add2_2nd = workflow.add(Add2(x=add2_1st.out)) + return add2_2nd.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out - with Submitter(worker=plugin) as sub: - sub(wf) + wf = Workflow(x=[2, 4]) - results = wf.result() - assert results.output.out == [6, 8] - # checking the output directory - assert wf.output_dir.exists() + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out == [6, 8] def test_wfasnd_wfst_4(plugin, tmpdir): @@ -2464,20 +2268,24 @@ def test_wfasnd_wfst_4(plugin, tmpdir): workflow-node with two tasks, splitter for the main workflow """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(Add2(name="add2_1st", x=wfnd.lzin.x)) - wfnd.add(Add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) - wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) - wf.add(wfnd) - wf.split("x", x=[2, 4]) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Wfnd(x): + add2_1st = workflow.add(Add2(x=x)) + add2_2nd = workflow.add(Add2(x=add2_1st.out)) + return add2_2nd.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out + + wf = Workflow().split("x", x=[2, 4]) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) # assert wf.output_dir.exists() - results = wf.result() + assert results[0].output.out == 6 assert results[1].output.out == 8 # checking all directories @@ -2494,19 +2302,18 @@ def test_wf_nostate_cachedir(plugin, tmpdir): """wf with provided cache_dir using pytest tmpdir""" cache_dir = tmpdir.mkdir("test_wf_cache_1") - wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf = Workflow(x=2, y=3) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + assert 8 == results.outputs.out shutil.rmtree(cache_dir) @@ -2518,19 +2325,18 @@ def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): cache_dir = "test_wf_cache_2" tmpdir.mkdir(cache_dir) - wf = Workflow(name="wf_2", input_spec=["x", "y"], cache_dir=cache_dir) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.inputs.x = 2 - wf.inputs.y = 3 + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf = Workflow(x=2, y=3) - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + assert 8 == results.outputs.out shutil.rmtree(cache_dir) @@ -2552,7 +2358,7 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): wf1.inputs.y = 3 t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2572,7 +2378,7 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): wf2.inputs.y = 3 t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -2609,7 +2415,7 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2630,7 +2436,7 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -2669,7 +2475,7 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2692,7 +2498,7 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -2730,7 +2536,7 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2751,7 +2557,7 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -2787,7 +2593,7 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2808,7 +2614,7 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -2845,7 +2651,7 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2866,7 +2672,7 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2, rerun=True) t2 = time.time() - t0 @@ -2903,7 +2709,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2925,7 +2731,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -2966,7 +2772,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -2989,7 +2795,7 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3030,7 +2836,7 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3054,7 +2860,7 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3091,7 +2897,7 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): wf1.inputs.x = 3 wf1.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) results1 = wf1.result() @@ -3109,7 +2915,7 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): wf2.inputs.x = 2 wf2.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) results2 = wf2.result() @@ -3140,7 +2946,7 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): wf1.inputs.x = 3 wf1.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) results1 = wf1.result() @@ -3155,7 +2961,7 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): # updating cache_locations after adding the tasks wf2.cache_locations = cache_dir1 - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) results2 = wf2.result() @@ -3186,7 +2992,7 @@ def test_wf_state_cachelocations(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3207,7 +3013,7 @@ def test_wf_state_cachelocations(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3250,7 +3056,7 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3271,7 +3077,7 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2, rerun=True) t2 = time.time() - t0 @@ -3315,7 +3121,7 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3337,7 +3143,7 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): wf2.mult.inputs.y = wf2.lzin.y t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3379,7 +3185,7 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): wf1.inputs.y = 3 wf1.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) results1 = wf1.result() @@ -3397,7 +3203,7 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) results2 = wf2.result() @@ -3431,7 +3237,7 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3453,7 +3259,7 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): t0 = time.time() # changing cache_locations to non-existing dir - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2, cache_locations=cache_dir1_empty) t2 = time.time() - t0 @@ -3489,7 +3295,7 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): wf1.inputs.y = 3 wf1.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) results1 = wf1.result() @@ -3509,7 +3315,7 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): wf2.inputs.y = 3 wf2.plugin = plugin - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) results2 = wf2.result() @@ -3544,7 +3350,7 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3567,7 +3373,7 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3609,7 +3415,7 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3632,7 +3438,7 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2, rerun=True) t2 = time.time() - t0 @@ -3672,7 +3478,7 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3694,7 +3500,7 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3735,7 +3541,7 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3758,7 +3564,7 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): wf2.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf2) t2 = time.time() - t0 @@ -3796,7 +3602,7 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3810,7 +3616,7 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): # running workflow the second time t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t2 = time.time() - t0 @@ -3841,7 +3647,7 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): wf1.plugin = plugin t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t1 = time.time() - t0 @@ -3857,7 +3663,7 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): # running workflow the second time t0 = time.time() - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf1) t2 = time.time() - t0 @@ -3875,11 +3681,13 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): @pytest.fixture def create_tasks(): - wf = Workflow(name="wf", input_spec=["x"]) - wf.inputs.x = 1 - wf.add(Add2(name="t1", x=wf.lzin.x)) - wf.add(Multiply(name="t2", x=wf.t1.lzout.out, y=2)) - wf.set_output([("out", wf.t2.lzout.out)]) + @workflow.define + def Workflow(x): + t1 = workflow.add(Add2(x=x)) + t2 = workflow.add(Multiply(x=t1.out, y=2)) + return t2.out + + wf = Workflow(x=1) t1 = wf.name2obj["t1"] t2 = wf.name2obj["t2"] return wf, t1, t2 @@ -3907,7 +3715,7 @@ def test_cache_propagation2(tmpdir, create_tasks): def test_cache_propagation3(tmpdir, create_tasks): """Shared cache_dir with state""" wf, t1, t2 = create_tasks - wf.split("x", x=[1, 2]) + wf = Workflow().split("x", x=[1, 2]) wf.cache_dir = (tmpdir / "shared").strpath wf(plugin="cf") assert wf.cache_dir == t1.cache_dir == t2.cache_dir @@ -3955,20 +3763,19 @@ def test_wf_lzoutall_1(plugin, tmpdir): passing entire result object to add2_sub2_res function by using lzout.all syntax """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out", wf.add_sub.lzout.out_add)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) + return add_sub.out_add + + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert 8 == results.output.out + assert 8 == results.outputs.out def test_wf_lzoutall_1a(plugin, tmpdir): @@ -3976,20 +3783,19 @@ def test_wf_lzoutall_1a(plugin, tmpdir): passing entire result object to add2_res function by using lzout.all syntax in the node connections and for wf output """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_all", wf.add_sub.lzout.all_)]) - wf.inputs.x = 2 - wf.inputs.y = 3 - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) + return add_sub.all_ # out_all + + wf = Workflow(x=2, y=3) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_all == {"out_add": 8, "out_sub": 4} + assert results.outputs.out_all == {"out_add": 8, "out_sub": 4} def test_wf_lzoutall_st_1(plugin, tmpdir): @@ -3997,21 +3803,19 @@ def test_wf_lzoutall_st_1(plugin, tmpdir): passing entire result object to add2_res function by using lzout.all syntax """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) + add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) + return add_sub.out_add # out_add - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_add == [8, 62, 62, 602] + wf = Workflow(x=[2, 20], y=[3, 30]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out_add == [8, 62, 62, 602] def test_wf_lzoutall_st_1a(plugin, tmpdir): @@ -4019,21 +3823,19 @@ def test_wf_lzoutall_st_1a(plugin, tmpdir): passing entire result object to add2_res function by using lzout.all syntax """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Add2Sub2Res(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_all", wf.add_sub.lzout.all_)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) - - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_all == [ + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) + add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) + return add_sub.all_ # out_all + + wf = Workflow(x=[2, 20], y=[3, 30]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out_all == [ {"out_add": 8, "out_sub": 4}, {"out_add": 62, "out_sub": 58}, {"out_add": 62, "out_sub": 58}, @@ -4046,24 +3848,20 @@ def test_wf_lzoutall_st_2(plugin, tmpdir): passing entire result object to add2_res function by using lzout.all syntax """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add( - Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") - ) - wf.add(Add2Sub2ResList(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) + add_sub = workflow.add(Add2Sub2ResList(res=mult.all_)) + return add_sub.out_add # out_add + + wf = Workflow(x=[2, 20], y=[3, 30]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_add[0] == [8, 62] - assert results.output.out_add[1] == [62, 602] + assert results.outputs.out_add[0] == [8, 62] + assert results.outputs.out_add[1] == [62, 602] @pytest.mark.xfail( @@ -4078,23 +3876,19 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir): passing entire result object to add2_res function by using lzout.all syntax """ - wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add( - Multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") - ) - wf.add(Add2Sub2ResList(name="add_sub", res=wf.mult.lzout.all_)) - wf.set_output([("out_all", wf.add_sub.lzout.all_)]) - wf.inputs.x = [2, 20] - wf.inputs.y = [3, 30] - wf.plugin = plugin - wf.cache_dir = tmpdir - - with Submitter(worker=plugin) as sub: - sub(wf) - - assert wf.output_dir.exists() - results = wf.result() - assert results.output.out_all == [ + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) + add_sub = workflow.add(Add2Sub2ResList(res=mult.all_)) + return add_sub.all_ # out_all + + wf = Workflow(x=[2, 20], y=[3, 30]) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) + + assert results.outputs.out_all == [ {"out_add": [8, 62], "out_sub": [4, 58]}, {"out_add": [62, 602], "out_sub": [58, 598]}, ] @@ -4105,18 +3899,19 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir): def test_wf_resultfile_1(plugin, tmpdir): """workflow with a file in the result, file should be copied to the wf dir""" - wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunWriteFile(name="writefile", filename=wf.lzin.x)) - wf.inputs.x = "file_1.txt" - wf.plugin = plugin - wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + writefile = workflow.add(FunWriteFile(filename=x)) + + return writefile.out # wf_out + + wf = Workflow(x="file_1.txt") + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # checking if the file exists and if it is in the Workflow directory - wf_out = results.output.wf_out.fspath + wf_out = results.outputs.wf_out.fspath wf_out.exists() assert wf_out == wf.output_dir / "file_1.txt" @@ -4125,19 +3920,20 @@ def test_wf_resultfile_2(plugin, tmpdir): """workflow with a list of files in the wf result, all files should be copied to the wf dir """ - wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunWriteFileList(name="writefile", filename_list=wf.lzin.x)) - file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] - wf.inputs.x = file_list - wf.plugin = plugin - wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + writefile = workflow.add(FunWriteFileList(filename_list=x)) + + return writefile.out # wf_out + + file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] + wf = Workflow(x=file_list) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # checking if the file exists and if it is in the Workflow directory - for ii, file in enumerate(results.output.wf_out): + for ii, file in enumerate(results.outputs.wf_out): assert file.fspath.exists() assert file.fspath == wf.output_dir / file_list[ii] @@ -4146,19 +3942,20 @@ def test_wf_resultfile_3(plugin, tmpdir): """workflow with a dictionaries of files in the wf result, all files should be copied to the wf dir """ - wf = Workflow(name="wf_file_1", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunWriteFileList2Dict(name="writefile", filename_list=wf.lzin.x)) - file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] - wf.inputs.x = file_list - wf.plugin = plugin - wf.set_output([("wf_out", wf.writefile.lzout.out)]) - with Submitter(worker=plugin) as sub: - sub(wf) + @workflow.define + def Workflow(x): + writefile = workflow.add(FunWriteFileList2Dict(filename_list=x)) + + return writefile.out # wf_out + + file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] + wf = Workflow(x=file_list) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) - results = wf.result() # checking if the file exists and if it is in the Workflow directory - for key, val in results.output.wf_out.items(): + for key, val in results.outputs.wf_out.items(): if key == "random_int": assert val == 20 else: @@ -4169,16 +3966,19 @@ def test_wf_resultfile_3(plugin, tmpdir): def test_wf_upstream_error1(plugin, tmpdir): """workflow with two tasks, task2 dependent on an task1 which raised an error""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + return addvar2.out + + wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4187,16 +3987,21 @@ def test_wf_upstream_error2(plugin, tmpdir): """task2 dependent on task1, task1 errors, workflow-level split on task 1 goal - workflow finish running, one output errors but the other doesn't """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.split("x", x=[1, "hi"]) # workflow-level split TypeError for adding str and int - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + return addvar2.out + + wf = Workflow().split( + "x", x=[1, "hi"] + ) # workflow-level split TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4206,50 +4011,56 @@ def test_wf_upstream_error3(plugin, tmpdir): """task2 dependent on task1, task1 errors, task-level split on task 1 goal - workflow finish running, one output errors but the other doesn't """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1")) - wf.inputs.x = [1, "hi"] # TypeError for adding str and int - wf.addvar1.split("a", a=wf.lzin.x) # task-level split - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType().split("a", a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + return addvar2.out + + wf = Workflow(x=[1, "hi"]) # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) def test_wf_upstream_error4(plugin, tmpdir): """workflow with one task, which raises an error""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.set_output([("out", wf.addvar1.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + return addvar1.out + + wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "raised an error" in str(excinfo.value) assert "addvar1" in str(excinfo.value) def test_wf_upstream_error5(plugin, tmpdir): """nested workflow with one task, which raises an error""" - wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) - wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.plugin = plugin - wf.set_output([("wf_out", wf.addvar1.lzout.out)]) - wf_main.add(wf) - wf_main.inputs.x = "hi" # TypeError for adding str and int - wf_main.set_output([("out", wf_main.wf.lzout.wf_out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + return addvar1.out # wf_out + + @workflow.define + def WfMain(x): + wf = workflow.add(Workflow(x=x)) + return wf.out + + wf_main = WfMain(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) @@ -4258,19 +4069,23 @@ def test_wf_upstream_error5(plugin, tmpdir): def test_wf_upstream_error6(plugin, tmpdir): """nested workflow with two tasks, the first one raises an error""" - wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) - wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.plugin = plugin - wf.set_output([("wf_out", wf.addvar2.lzout.out)]) - wf_main.add(wf) - wf_main.inputs.x = "hi" # TypeError for adding str and int - wf_main.set_output([("out", wf_main.wf.lzout.wf_out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + + return addvar2.out # wf_out + + @workflow.define + def WfMain(x): + wf = workflow.add(Workflow(x=x)) + return wf.out + + wf_main = WfMain(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin) as sub: + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) @@ -4282,17 +4097,20 @@ def test_wf_upstream_error7(plugin, tmpdir): workflow with three sequential tasks, the first task raises an error the last task is set as the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(FunAddVarDefaultNoType(name="addvar3", a=wf.addvar2.lzout.out)) - wf.set_output([("out", wf.addvar3.lzout.out)]) + + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) + return addvar3.out + + wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.addvar1._errored is True @@ -4304,17 +4122,19 @@ def test_wf_upstream_error7a(plugin, tmpdir): workflow with three sequential tasks, the first task raises an error the second task is set as the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(FunAddVarDefaultNoType(name="addvar3", a=wf.addvar2.lzout.out)) - wf.set_output([("out", wf.addvar2.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) + return addvar2.out + + wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.addvar1._errored is True @@ -4326,17 +4146,19 @@ def test_wf_upstream_error7b(plugin, tmpdir): workflow with three sequential tasks, the first task raises an error the second and the third tasks are set as the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(FunAddVarDefaultNoType(name="addvar3", a=wf.addvar2.lzout.out)) - wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addvar3.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) + return addvar2.out, addvar3.out # (outputs=["out1", "out2"]) + + wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.addvar1._errored is True @@ -4345,17 +4167,19 @@ def test_wf_upstream_error7b(plugin, tmpdir): def test_wf_upstream_error8(plugin, tmpdir): """workflow with three tasks, the first one raises an error, so 2 others are removed""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = "hi" # TypeError for adding str and int - wf.plugin = plugin - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(FunAddTwo(name="addtwo", a=wf.addvar1.lzout.out)) - wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addtwo.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addtwo = workflow.add(FunAddTwo(a=addvar1.out)) + return addvar2.out, addtwo.out # (outputs=["out1", "out2"]) + + wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4369,20 +4193,22 @@ def test_wf_upstream_error9(plugin, tmpdir): one branch has an error, the second is fine the errored branch is connected to the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = 2 - wf.add(FunAddVarNoType(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(FunAddVarDefaultNoType(name="follow_err", a=wf.err.lzout.out)) - wf.add(FunAddTwoNoType(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addtwo.lzout.out)) - wf.set_output([("out1", wf.follow_err.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) + follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out)) + + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) + return follow_err.out # out1 - wf.plugin = plugin + wf = Workflow(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.err._errored is True @@ -4396,19 +4222,22 @@ def test_wf_upstream_error9a(plugin, tmpdir): the branch without error is connected to the workflow output so the workflow finished clean """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefault(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = 2 - wf.add(FunAddVarNoType(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(FunAddVarDefault(name="follow_err", a=wf.err.lzout.out)) - wf.add(FunAddTwoNoType(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(FunAddVarDefault(name="addvar2", a=wf.addtwo.lzout.out)) - wf.set_output([("out1", wf.addvar2.lzout.out)]) # , ("out2", wf.addtwo.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefault(a=x)) - wf.plugin = plugin - with Submitter(worker=plugin) as sub: - sub(wf) + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) + follow_err = workflow.add(FunAddVarDefault(a=err.out)) + + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) + addvar2 = workflow.add(FunAddVarDefault(a=addtwo.out)) + return addvar2.out # out1 # , ("out2", addtwo.out)]) + + wf = Workflow(x=2) + + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert wf.err._errored is True assert wf.follow_err._errored == ["err"] @@ -4419,20 +4248,22 @@ def test_wf_upstream_error9b(plugin, tmpdir): one branch has an error, the second is fine both branches are connected to the workflow output """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(FunAddVarDefaultNoType(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = 2 - wf.add(FunAddVarNoType(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(FunAddVarDefaultNoType(name="follow_err", a=wf.err.lzout.out)) - wf.add(FunAddTwoNoType(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(FunAddVarDefaultNoType(name="addvar2", a=wf.addtwo.lzout.out)) - wf.set_output([("out1", wf.follow_err.lzout.out), ("out2", wf.addtwo.lzout.out)]) + @workflow.define + def Workflow(x): + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) + follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out)) + + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) + return follow_err.out, addtwo.out # (outputs=["out1", "out2"]) - wf.plugin = plugin + wf = Workflow(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin) as sub: - sub(wf) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + results = sub(wf) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.err._errored is True @@ -4468,12 +4299,15 @@ def exporting_graphs(wf, name): @pytest.mark.parametrize("splitter", [None, "x"]) def test_graph_1(tmpdir, splitter): """creating a set of graphs, wf with two nodes""" - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(Multiply(name="mult_1", x=wf.lzin.x, y=wf.lzin.y)) - wf.add(Multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) - wf.add(Add2(name="add2", x=wf.mult_1.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) - wf.split(splitter, x=[1, 2]) + + @workflow.define + def Workflow(x, y): + mult_1 = workflow.add(Multiply(x=x, y=y)) + mult_2 = workflow.add(Multiply(x=x, y=x)) + add2 = workflow.add(Add2(x=mult_1.out)) + return add2.out + + wf = Workflow().split(splitter, x=[1, 2]) # simple graph dotfile_s = wf.create_dotfile() @@ -4510,11 +4344,15 @@ def test_graph_1st(tmpdir): """creating a set of graphs, wf with two nodes some nodes have splitters, should be marked with blue color """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(Multiply(name="mult_1", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(Multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) - wf.add(Add2(name="add2", x=wf.mult_1.lzout.out)) - wf.set_output([("out", wf.add2.lzout.out)]) + + @workflow.define + def Workflow(x, y): + mult_1 = workflow.add(Multiply(y=y).split("x", x=x)) + mult_2 = workflow.add(Multiply(x=x, y=x)) + add2 = workflow.add(Add2(x=mult_1.out)) + return add2.out + + wf = Workflow(x=[1, 2], y=2) # simple graph dotfile_s = wf.create_dotfile() @@ -4551,11 +4389,15 @@ def test_graph_1st_cmb(tmpdir): the first one has a splitter, the second has a combiner, so the third one is stateless first two nodes should be blue and the arrow between them should be blue """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wf.add(Add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) - wf.add(ListSum(name="sum", x=wf.add2.lzout.out)) - wf.set_output([("out", wf.sum.lzout.out)]) + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(y=y).split("x", x=x)) + add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) + sum = workflow.add(ListSum(x=add2.out)) + return sum.out + + wf = Workflow(x=[1, 2], y=2) # simple graph dotfile_s = wf.create_dotfile() dotstr_s_lines = dotfile_s.read_text().split("\n") @@ -4590,12 +4432,16 @@ def test_graph_1st_cmb(tmpdir): def test_graph_2(tmpdir): """creating a graph, wf with one workflow as a node""" - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x)) + return wfnd.out # simple graph dotfile_s = wf.create_dotfile() @@ -4624,12 +4470,18 @@ def test_graph_2st(tmpdir): """creating a set of graphs, wf with one workflow as a node the inner workflow has a state, so should be blue """ - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"]).split("x", x=wf.lzin.x) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x): + wfnd = workflow.add(Wfnd(x=x).split("x", x=x)) + return wfnd.out + + wf = Workflow(x=[1, 2]) # simple graph dotfile_s = wf.create_dotfile() @@ -4658,14 +4510,17 @@ def test_graph_2st(tmpdir): def test_graph_3(tmpdir): """creating a set of graphs, wf with two nodes (one node is a workflow)""" - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + wfnd = workflow.add(Wfnd(x=mult.out)) + return wfnd.out # simple graph dotfile_s = wf.create_dotfile() @@ -4700,14 +4555,17 @@ def test_graph_3st(tmpdir): the first node has a state and it should be passed to the second node (blue node and a wfasnd, and blue arrow from the node to the wfasnd) """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(Multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(Add2(name="add2", x=wfnd.lzin.x)) - wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + @workflow.define + def Wfnd(x): + add2 = workflow.add(Add2(x=x)) + return add2.out + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(y=y).split("x", x=x)) + wfnd = workflow.add(Wfnd(x=mult.out)) + return wfnd.out # simple graph dotfile_s = wf.create_dotfile() @@ -4741,14 +4599,20 @@ def test_graph_4(tmpdir): """creating a set of graphs, wf with two nodes (one node is a workflow with two nodes inside). Connection from the node to the inner workflow. """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(Multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) - wfnd.add(Add2(name="add2_a", x=wfnd.lzin.x)) - wfnd.add(Add2(name="add2_b", x=wfnd.add2_a.lzout.out)) - wfnd.set_output([("out", wfnd.add2_b.lzout.out)]) - wf.add(wfnd) - wf.set_output([("out", wf.wfnd.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2_a = workflow.add(Add2(x=x)) + add2_b = workflow.add(Add2(x=add2_a.out)) + return add2_b.out + + @workflow.define + def Workflow(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + wfnd = workflow.add(Wfnd(x=mult.out)) + return wfnd.out + + wf = Workflow(x=2, y=3) # simple graph dotfile_s = wf.create_dotfile() @@ -4784,14 +4648,20 @@ def test_graph_5(tmpdir): """creating a set of graphs, wf with two nodes (one node is a workflow with two nodes inside). Connection from the inner workflow to the node. """ - wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x) - wfnd.add(Add2(name="add2_a", x=wfnd.lzin.x)) - wfnd.add(Add2(name="add2_b", x=wfnd.add2_a.lzout.out)) - wfnd.set_output([("out", wfnd.add2_b.lzout.out)]) - wf.add(wfnd) - wf.add(Multiply(name="mult", x=wf.wfnd.lzout.out, y=wf.lzin.y)) - wf.set_output([("out", wf.mult.lzout.out)]) + + @workflow.define + def Wfnd(x): + add2_a = workflow.add(Add2(x=x)) + add2_b = workflow.add(Add2(x=add2_a.out)) + return add2_b.out + + @workflow.define + def Workflow(x, y): + wfnd = workflow.add(Wfnd(x=x)) + mult = workflow.add(Multiply(x=wfnd.out, y=y)) + return mult.out + + wf = Workflow(x=2, y=3) # simple graph dotfile_s = wf.create_dotfile() @@ -4834,15 +4704,17 @@ def test_duplicate_input_on_split_wf(tmpdir): def printer(a): return a - wf = Workflow(name="wf", input_spec=["text"], cache_dir=tmpdir) - wf.split(("text"), text=text) + @workflow.define + def Workflow(text): + + printer1 = workflow.add(printer(a=text)) - wf.add(printer(name="printer1", a=wf.lzin.text)) + return printer1.out # out1 - wf.set_output([("out1", wf.printer1.lzout.out)]) + wf = Workflow().split(("text"), text=text) with Submitter(worker="cf", n_procs=6) as sub: - sub(wf) + results = sub(wf) res = wf.result() @@ -4882,10 +4754,12 @@ def one_arg_inner(start_number): ) # Inner Workflow - test_inner = Workflow(name="test_inner", input_spec=["start_number1"]) - test_inner.add( - one_arg_inner(name="Ilevel1", start_number=test_inner.lzin.start_number1) - ) + @workflow.define + def Workflow(start_number1): + Ilevel1 = workflow.add( + one_arg_inner(start_number=test_inner.lzin.start_number1) + ) + test_inner.set_output([("res", test_inner.Ilevel1.lzout.out)]) # Outer workflow has two nodes plus the inner workflow @@ -4915,9 +4789,10 @@ def pass_odds(x): print(f"x%2 = {x % 2}\n") return x - wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(pass_odds(name="pass_odds").split("x", x=[1, 2, 3, 4, 5])) - wf.set_output([("out", wf.pass_odds.lzout.out)]) + @workflow.define + def Workflow(x): + pass_odds = workflow.add(pass_odds().split("x", x=[1, 2, 3, 4, 5])) + return pass_odds.out with pytest.raises(Exception): wf() @@ -4943,83 +4818,85 @@ def pass_odds(x): def test_wf_state_arrays(): - wf = Workflow( - name="test", - input_spec={"x": ty.List[int], "y": int}, - output_spec={"alpha": int, "beta": ty.List[int]}, - ) + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def Workflow(x: ty.List[int], y: int): - wf.add( # Split over workflow input "x" on "scalar" input - ListMultSum( - in_list=wf.lzin.x, - name="A", - ).split(scalar=wf.lzin.x) - ) + A = workflow.add( # Split over workflow input "x" on "scalar" input + ListMultSum( + in_list=x, + ).split(scalar=x) + ) - wf.add( # Workflow is still split over "x", combined over "x" on out - ListMultSum( - name="B", - scalar=wf.A.lzout.sum, - in_list=wf.A.lzout.products, - ).combine("A.scalar") - ) + B = workflow.add( # Workflow is still split over "x", combined over "x" on out + ListMultSum( + scalar=A.sum, + in_list=A.products, + ).combine("A.scalar") + ) - wf.add( # Workflow " - ListMultSum( - name="C", - scalar=wf.lzin.y, - in_list=wf.B.lzout.sum, + C = workflow.add( # Workflow " + ListMultSum( + scalar=y, + in_list=B.sum, + ) ) - ) - wf.add( # Workflow is split again, this time over C.products - ListMultSum( - name="D", - in_list=wf.lzin.x, + D = workflow.add( # Workflow is split again, this time over C.products + ListMultSum( + in_list=x, + ) + .split(scalar=C.products) + .combine("scalar") ) - .split(scalar=wf.C.lzout.products) - .combine("scalar") - ) - wf.add( # Workflow is finally combined again into a single node - ListMultSum(name="E", scalar=wf.lzin.y, in_list=wf.D.lzout.sum) - ) + E = workflow.add( # Workflow is finally combined again into a single node + ListMultSum(scalar=y, in_list=D.sum) + ) - wf.set_output([("alpha", wf.E.lzout.sum), ("beta", wf.E.lzout.products)]) + return E.sum, E.products # (outputs=["alpha", "beta"]) results = wf(x=[1, 2, 3, 4], y=10) - assert results.output.alpha == 3000000 - assert results.output.beta == [100000, 400000, 900000, 1600000] + assert results.outputs.alpha == 3000000 + assert results.outputs.beta == [100000, 400000, 900000, 1600000] def test_wf_input_output_typing(): - wf = Workflow( - name="test", - input_spec={"x": int, "y": ty.List[int]}, - output_spec={"alpha": int, "beta": ty.List[int]}, - ) - with pytest.raises(TypeError) as exc_info: + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def MismatchInputWf(x: int, y: ty.List[int]): ListMultSum( - scalar=wf.lzin.y, - in_list=wf.lzin.y, + scalar=y, + in_list=y, name="A", ) + + with pytest.raises(TypeError) as exc_info: + MismatchInputWf(x=1, y=[1, 2, 3]) exc_info_matches(exc_info, "Cannot coerce into ") - wf.add( # Split over workflow input "x" on "scalar" input - ListMultSum( - scalar=wf.lzin.x, - in_list=wf.lzin.y, - name="A", + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def MismatchOutputWf(x: int, y: ty.List[int]): + A = workflow.add( # Split over workflow input "x" on "scalar" input + ListMultSum( + scalar=x, + in_list=y, + ) ) - ) + return A.products, A.products with pytest.raises(TypeError, match="don't match their declared types"): - wf.set_output( - [ - ("alpha", wf.A.lzout.products), - ] + MismatchOutputWf(x=1, y=[1, 2, 3]) + + @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) + def Workflow(x: int, y: ty.List[int]): + A = workflow.add( # Split over workflow input "x" on "scalar" input + ListMultSum( + scalar=x, + in_list=y, + ) ) + return A.sum, A.products - wf.set_output([("alpha", wf.A.lzout.sum), ("beta", wf.A.lzout.products)]) + outputs = Workflow(x=10, y=[1, 2, 3, 4])() + assert outputs.sum == 10 + assert outputs.products == [10, 20, 30, 40] From 9ba0e4295434a32067a7e32e16d3d9b448089636 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 19:50:20 +1100 Subject: [PATCH 301/342] cleaned up all linting errors and warnings in test_workflow --- pydra/engine/tests/test_workflow.py | 1770 ++++++++++++--------------- 1 file changed, 776 insertions(+), 994 deletions(-) diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 89ca005fcc..1c6e0bc173 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1,5 +1,7 @@ import pytest -import shutil, os, sys +import shutil +import os +import sys import time import typing as ty import attr @@ -8,8 +10,8 @@ Add2, Add2Wait, Multiply, - MultiplyList, - MultiplyMixed, + # MultiplyList, + # MultiplyMixed, Power, Ten, Identity, @@ -35,7 +37,6 @@ ) from pydra.engine.submitter import Submitter from pydra.design import python, workflow -from pydra.engine.specs import ShellDef from pydra.utils import exc_info_matches @@ -44,13 +45,13 @@ def test_wf_no_output(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x)) + workflow.add(Add2(x=x)) wf = Workflow(x=2) with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "Workflow output cannot be None" in str(excinfo.value) @@ -64,11 +65,11 @@ def Workflow(x): wf = Workflow(x=2) - checksum_before = wf.checksum + checksum_before = wf._checksum with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before assert 4 == results.outputs.out @@ -102,7 +103,7 @@ def Workflow(x): wf = Workflow(x=2) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - wf(submitter=sub) + results = sub(wf) assert 4 == results.outputs.out @@ -117,9 +118,9 @@ def Workflow(x): wf = Workflow(x=2) - wf(plugin=plugin) + outputs = wf(plugin=plugin) - assert 4 == results.outputs.out + assert 4 == outputs.out def test_wf_1_call_noplug_nosubm(plugin, tmpdir): @@ -134,7 +135,7 @@ def Workflow(x): outputs = wf() - assert 4 == results.outputs.out + assert 4 == outputs.out def test_wf_1_call_exception(plugin, tmpdir): @@ -311,7 +312,7 @@ def Workflow(x, y): with pytest.raises(TypeError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "unsupported" in str(excinfo.value) @@ -390,29 +391,16 @@ def Workflow(x, y): assert 1 == results.outputs.out_sub -def test_wf_5b_exception(tmpdir): - """set_output used twice with the same name - exception should be raised""" - - @workflow.define - def Workflow(x, y): - addsub = workflow.add(FunAddSubVar(a=x, b=y)) - return addsub.sum - - wf = Workflow(x=3, y=2) - with pytest.raises(Exception, match="are already set"): - return addsub.sub - - def test_wf_6(plugin, tmpdir): """wf with two tasks and two outputs connected to both tasks, one set_output """ - @workflow.define + @workflow.define(outputs=["out1", "out2"]) def Workflow(x, y): mult = workflow.add(Multiply(x=x, y=y)) add2 = workflow.add(Add2(x=mult.out)) - return mult.out, add2.out # (outputs=["out1", "out2"]) + return mult.out, add2.out # wf = Workflow(x=2, y=3) @@ -448,23 +436,21 @@ def test_wf_st_1(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + add2 = workflow.add(Add2(x=x).split("x", x=x)) return add2.out - checksum_before = wf.checksum + wf = Workflow(x=[1, 2]) + + checksum_before = wf._checksum with Submitter(cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 3 + assert results.outputs.out[1] == 4 def test_wf_st_1_call_subm(plugin, tmpdir): @@ -472,20 +458,18 @@ def test_wf_st_1_call_subm(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + add2 = workflow.add(Add2(x=x).split("x", x=x)) return add2.out + wf = Workflow(x=[1, 2]) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - wf(submitter=sub) + results = sub(wf) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 3 + assert results.outputs.out[1] == 4 def test_wf_st_1_call_plug(plugin, tmpdir): @@ -495,19 +479,17 @@ def test_wf_st_1_call_plug(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + add2 = workflow.add(Add2(x=x).split("x", x=x)) return add2.out - wf(plugin=plugin) + wf = Workflow(x=[1, 2]) + + outputs = wf(plugin=plugin) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 def test_wf_st_1_call_selfplug(plugin, tmpdir): @@ -517,18 +499,16 @@ def test_wf_st_1_call_selfplug(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + add2 = workflow.add(Add2(x=x).split("x", x=x)) return add2.out - outputs = wf()() + wf = Workflow(x=[1, 2]) + + outputs = wf() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): @@ -539,18 +519,16 @@ def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=[1, 2])) + add2 = workflow.add(Add2(x=x).split("x", x=x)) return add2.out - outputs = wf()() + wf = Workflow(x=[1, 2]) + + outputs = wf() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 def test_wf_st_1_inp_in_call(tmpdir): @@ -563,8 +541,8 @@ def Workflow(x): wf = Workflow().split("x", x=[1, 2]) results = wf() - assert results[0].output.out == 3 - assert results[1].output.out == 4 + assert results.outputs.out[0] == 3 + assert results.outputs.out[1] == 4 def test_wf_st_1_upd_inp_call(tmpdir): @@ -577,8 +555,8 @@ def Workflow(x): wf = Workflow().split("x", x=[11, 22]) results = wf(x=[1, 2]) - assert results[0].output.out == 3 - assert results[1].output.out == 4 + assert results.outputs.out[0] == 3 + assert results.outputs.out[1] == 4 def test_wf_st_noinput_1(plugin, tmpdir): @@ -586,18 +564,18 @@ def test_wf_st_noinput_1(plugin, tmpdir): @workflow.define def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=[])) + add2 = workflow.add(Add2(x=x).split("x", x=x)) return add2.out - checksum_before = wf.checksum + wf = Workflow(x=[]) + + checksum_before = wf._checksum with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before assert results == [] - # checking all directories - assert wf.output_dir == [] def test_wf_ndst_1(plugin, tmpdir): @@ -610,11 +588,11 @@ def Workflow(x): wf = Workflow(x=[1, 2]) - checksum_before = wf.checksum + checksum_before = wf._checksum with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results.outputs.out == [3, 4] @@ -646,9 +624,11 @@ def test_wf_ndst_updatespl_1a(plugin, tmpdir): @workflow.define def Workflow(x): - task_add2 = workflow.add(Add2(name="add2", x=x).split("x", x=[1, 2])) + add2 = workflow.add(Add2().split("x", x=x)) return add2.out + wf = Workflow(x=[1, 2]) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -685,11 +665,11 @@ def Workflow(x): wf = Workflow(x=[]) - checksum_before = wf.checksum + checksum_before = wf._checksum with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before assert results.outputs.out == [] @@ -709,12 +689,8 @@ def Workflow(x): results = sub(wf) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results[0].output.out == 3 - assert results[1].output.out == 4 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 3 + assert results.outputs.out[1] == 4 def test_wf_ndst_2(plugin, tmpdir): @@ -778,11 +754,6 @@ def Workflow(x, y): for i, res in enumerate(expected_ind): assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() - def test_wf_ndst_3(plugin, tmpdir): """Test workflow with 2 tasks, splitter on a task level""" @@ -819,12 +790,8 @@ def Workflow(x, y): # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] - assert results[0].output.out == 13 - assert results[1].output.out == 26 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 13 + assert results.outputs.out[1] == 26 def test_wf_ndst_4(plugin, tmpdir): @@ -854,21 +821,19 @@ def test_wf_st_5(plugin, tmpdir): @workflow.define def Workflow(x, y): mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out).split(["x", "y"], x=[1, 2], y=[11, 12])) + add2 = workflow.add(Add2(x=mult.out).split(["x", "y"], x=x, y=y)) return add2.out + wf = Workflow(x=[1, 2], y=[11, 12]) + with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert results[0].output.out == 13 - assert results[1].output.out == 14 - assert results[2].output.out == 24 - assert results[3].output.out == 26 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 13 + assert results.outputs.out[1] == 14 + assert results.outputs.out[2] == 24 + assert results.outputs.out[3] == 26 def test_wf_ndst_5(plugin, tmpdir): @@ -906,16 +871,12 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert results[0][0].output.out == 13 - assert results[0][1].output.out == 24 - assert results[0][2].output.out == 35 - assert results[1][0].output.out == 14 - assert results[1][1].output.out == 26 - assert results[1][2].output.out == 38 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0][0] == 13 + assert results.outputs.out[0][1] == 24 + assert results.outputs.out[0][2] == 35 + assert results.outputs.out[1][0] == 14 + assert results.outputs.out[1][1] == 26 + assert results.outputs.out[1][2] == 38 def test_wf_ndst_6(plugin, tmpdir): @@ -958,9 +919,8 @@ def test_wf_ndst_8(plugin, tmpdir): @workflow.define def Workflow(x, y): - workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) iden = workflow.add(Identity(x=mult.out)) - return iden.out wf = Workflow(x=[1, 2, 3], y=[11, 12]) @@ -1084,13 +1044,9 @@ def Workflow(x, y): results = sub(wf) assert len(results) == 6 - assert results[0].output.out == 39 - assert results[1].output.out == 42 - assert results[5].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 39 + assert results.outputs.out[1] == 42 + assert results.outputs.out[5] == 70 @pytest.mark.flaky(reruns=3) # when dask @@ -1133,16 +1089,12 @@ def Workflow(x, y): results = sub(wf) assert len(results) == 2 - assert results[0][0].output.out == 39 - assert results[0][1].output.out == 52 - assert results[0][2].output.out == 65 - assert results[1][0].output.out == 42 - assert results[1][1].output.out == 56 - assert results[1][2].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0][0] == 39 + assert results.outputs.out[0][1] == 52 + assert results.outputs.out[0][2] == 65 + assert results.outputs.out[1][0] == 42 + assert results.outputs.out[1][1] == 56 + assert results.outputs.out[1][2] == 70 def test_wf_3nd_ndst_2(plugin, tmpdir): @@ -1185,16 +1137,12 @@ def Workflow(x, y): results = sub(wf) assert len(results) == 3 - assert results[0][0].output.out == 39 - assert results[0][1].output.out == 42 - assert results[1][0].output.out == 52 - assert results[1][1].output.out == 56 - assert results[2][0].output.out == 65 - assert results[2][1].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0][0] == 39 + assert results.outputs.out[0][1] == 42 + assert results.outputs.out[1][0] == 52 + assert results.outputs.out[1][1] == 56 + assert results.outputs.out[2][0] == 65 + assert results.outputs.out[2][1] == 70 def test_wf_3nd_ndst_3(plugin, tmpdir): @@ -1238,16 +1186,12 @@ def Workflow(x, y): results = sub(wf) assert len(results) == 6 - assert results[0].output.out == 39 - assert results[1].output.out == 42 - assert results[2].output.out == 52 - assert results[3].output.out == 56 - assert results[4].output.out == 65 - assert results[5].output.out == 70 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 39 + assert results.outputs.out[1] == 42 + assert results.outputs.out[2] == 52 + assert results.outputs.out[3] == 56 + assert results.outputs.out[4] == 65 + assert results.outputs.out[5] == 70 def test_wf_3nd_ndst_4(plugin, tmpdir): @@ -1292,19 +1236,14 @@ def Workflow(x, y, z): results = sub(wf) assert len(results) == 4 - assert results[0][0].output.out == 27 - assert results[0][1].output.out == 28 - assert results[1][0].output.out == 117 - assert results[1][1].output.out == 118 - assert results[2][0].output.out == 28 - assert results[2][1].output.out == 29 - assert results[3][0].output.out == 118 - assert results[3][1].output.out == 119 - - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0][0] == 27 + assert results.outputs.out[0][1] == 28 + assert results.outputs.out[1][0] == 117 + assert results.outputs.out[1][1] == 118 + assert results.outputs.out[2][0] == 28 + assert results.outputs.out[2][1] == 29 + assert results.outputs.out[3][0] == 118 + assert results.outputs.out[3][1] == 119 def test_wf_3nd_ndst_5(plugin, tmpdir): @@ -1409,16 +1348,14 @@ def Workflow(zip): wf = Workflow(zip=[["test1", "test3", "test5"], ["test2", "test4", "test6"]]) with Submitter(worker="cf") as sub: - results = sub(wf) - - res = wf.result() + res = sub(wf) assert ( - res.output.out1 - == res.output.out1a + res.outputs.out1 + == res.outputs.out1a == [["test1", "test3", "test5"], ["test2", "test4", "test6"]] ) - assert res.output.out2 == res.output.out2a == ["Hoi", "Hoi"] + assert res.outputs.out2 == res.output.out2a == ["Hoi", "Hoi"] # workflows with Left and Right part in splitters A -> B (L&R parts of the splitter) @@ -1617,13 +1554,13 @@ def test_wf_ndstinner_3(plugin, tmpdir): the second task has two inputs and outer splitter that includes an inner field """ - @workflow.define + @workflow.define(outputs=["out_list", "out"]) def Workflow(x, y): list = workflow.add(ListOutput(x=x)) mult = workflow.add(Multiply().split(["x", "y"], x=list.out, y=y)) - return list.out + return list.out, mult.out - wf = Workflow(x=1, y=[10, 100]), mult.out # (outputs=["out_list", "out"]) + wf = Workflow(x=1, y=[10, 100]) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1641,14 +1578,14 @@ def test_wf_ndstinner_4(plugin, tmpdir): the third task has no its own splitter """ - @workflow.define + @workflow.define(outputs=["out_list", "out"]) def Workflow(x, y): list = workflow.add(ListOutput(x=x)) mult = workflow.add(Multiply(y=y).split("x", x=list.out)) add2 = workflow.add(Add2(x=mult.out)) - return list.out + return list.out, add2.out - wf = Workflow(x=1, y=10), add2.out # (outputs=["out_list", "out"]) + wf = Workflow(x=1, y=10) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1670,22 +1607,14 @@ def test_wf_ndstinner_5(plugin, tmpdir): the third task has no new splitter """ - @workflow.define + @workflow.define(outputs=["out_list", "out_mult", "out_add"]) def Workflow(x, y, b): list = workflow.add(ListOutput().split("x", x=x)) mult = workflow.add(Multiply().split(["y", "x"], x=list.out, y=y)) addvar = workflow.add(FunAddVar(a=mult.out).split("b", b=b)) + return list.out, mult.out, addvar.out - wf = Workflow(x=[1, 2], y=[10, 100]) - wf = Workflow(b=[3, 5]) - - wf.set_output( - [ - ("out_list", list.out), - ("out_mult", mult.out), - ("out_add", addvar.out), - ] - ) + wf = Workflow(x=[1, 2], y=[10, 100], b=[3, 5]) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1764,12 +1693,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert results[0].output.out == 13 - assert results[1].output.out == 24 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 13 + assert results.outputs.out[1] == 24 def test_wf_ndst_singl_1(plugin, tmpdir): @@ -1810,13 +1735,9 @@ def Workflow(x, y): results = sub(wf) assert len(results) == 3 - assert results[0].output.out == 39 - assert results[1].output.out == 52 - assert results[2].output.out == 65 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 39 + assert results.outputs.out[1] == 52 + assert results.outputs.out[2] == 65 def test_wf_ndst_singl_2(plugin, tmpdir): @@ -1885,11 +1806,11 @@ def Workflow(x): wf = Workflow(x=2) - checksum_before = wf.checksum + checksum_before = wf._checksum with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before assert results.outputs.out == 4 @@ -1948,18 +1869,17 @@ def Workflow(x): assert results.outputs.out == 5 # adding another layer of workflow - wf_o = Workflow(name="wf_o", input_spec=["x"], x=4) - wf = Workflow(x=wf_o.lzin.x) - wf_o.add(wf) - wf_o.set_output([("out", wf_o.wf.lzout.out)]) - wf_o.cache_dir = tmpdir + @workflow.define + def WorkflowO(x): + wf = workflow.add(Workflow(x=3)) + return wf.out + + wf_o = WorkflowO(x=4) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf_o) + results = sub(wf_o) - results = wf_o.result() assert results.outputs.out == 6 - assert wf_o.output_dir.exists() def test_wfasnd_st_1(plugin, tmpdir): @@ -1975,14 +1895,16 @@ def Wfnd(x): @workflow.define def Workflow(x): - wfnd = workflow.add(Wfnd(x=x).split("x", x=[2, 4])) + wfnd = workflow.add(Wfnd(x=x).split(x=x)) return wfnd.out - checksum_before = wf.checksum + wf = Workflow(x=[2, 4]) + + checksum_before = wf._checksum with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) - assert wf.checksum == checksum_before + assert wf._checksum == checksum_before assert results.outputs.out == [4, 6] @@ -2042,7 +1964,7 @@ def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): """ @workflow.define - def Workflow(x): + def Wfnd(x): add2 = workflow.add(Add2().split("x", x=x)) return add2.out @@ -2081,12 +2003,8 @@ def Workflow(x): results = sub(wf) # assert wf.output_dir.exists() - assert results[0].output.out == 4 - assert results[1].output.out == 6 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 4 + assert results.outputs.out[1] == 6 # workflows with structures wf(A) -> B @@ -2141,12 +2059,8 @@ def Workflow(x, y): results = sub(wf) # assert wf.output_dir.exists() - assert results[0].output.out == 4 - assert results[1].output.out == 42 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 4 + assert results.outputs.out[1] == 42 # workflows with structures A -> wf(B) @@ -2203,12 +2117,8 @@ def Workflow(x, y): results = sub(wf) # assert wf.output_dir.exists() - assert results[0].output.out == 4 - assert results[1].output.out == 42 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 4 + assert results.outputs.out[1] == 42 # workflows with structures wfns(A->B) @@ -2286,12 +2196,8 @@ def Workflow(x): results = sub(wf) # assert wf.output_dir.exists() - assert results[0].output.out == 6 - assert results[1].output.out == 8 - # checking all directories - assert wf.output_dir - for odir in wf.output_dir: - assert odir.exists() + assert results.outputs.out[0] == 6 + assert results.outputs.out[1] == 8 # Testing caching @@ -2350,40 +2256,38 @@ def test_wf_nostate_cachelocations(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # checking execution time (for unix and cf) # for win and dask/slurm the time for dir creation etc. might take much longer @@ -2406,42 +2310,38 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf1", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf2", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2451,8 +2351,7 @@ def test_wf_nostate_cachelocations_a(plugin, tmpdir): assert t2 < max(1, t1 - 1) # checking if both wf.output_dir are created - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) @@ -2466,44 +2365,38 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - # additional output - wf2.set_output([("out_pr", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define("out_pr") + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out == results2.output.out_pr + assert 8 == results2.outputs.out == results2.outputs.out_pr # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2512,8 +2405,7 @@ def test_wf_nostate_cachelocations_b(plugin, tmpdir): assert t2 < max(1, t1 - 1) # checking if the second wf didn't run again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) @@ -2527,42 +2419,37 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out1", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define(outputs=["out1"]) + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out # out1 + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out1 + assert 8 == results1.outputs.out1 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out2", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define(outputs=["out2"]) + def Workflow2(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out # out2 + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out2 + assert 8 == results2.outputs.out2 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2572,8 +2459,7 @@ def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): assert t2 < max(1, t1 - 1) # both wf output_dirs should be created - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) @@ -2584,42 +2470,37 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf1", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out1", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define(outputs=["out1"]) + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out # out1 + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out1 + assert 8 == results1.outputs.out1 - wf2 = Workflow( - name="wf2", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out2", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define(outputs=["out2"]) + def Workflow2(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out2 + assert 8 == results2.outputs.out2 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2628,8 +2509,7 @@ def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): assert t2 < max(1, t1 - 1) # both wf output_dirs should be created - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) @@ -2642,42 +2522,36 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2, rerun=True) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(wf2, rerun=True) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2686,8 +2560,7 @@ def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): assert t2 > 2 # checking if the second wf didn't run again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) @@ -2700,47 +2573,40 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - rerun=True, # wh has to be rerun (default for propagate_rerun is True) - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2, rerun=True) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # checking if the second wf runs again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # everything has to be recomputed assert len(list(Path(cache_dir1).glob("F*"))) == 2 @@ -2763,48 +2629,40 @@ def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - rerun=True, # wh has to be rerun - propagate_rerun=False, # but rerun doesn't propagate to the tasks - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2, rerun=True, propagate_rerun=False) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # checking if the second wf runs again - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -2827,48 +2685,42 @@ def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpd cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - rerun=True, - propagate_rerun=False, # rerun will not be propagated to each task - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - # rerun on the task level needed (wf.propagate_rerun is False) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out, rerun=True)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + # rerun on the task level needed (wf.propagate_rerun is False) + add2 = workflow.add(Add2Wait(x=mult.out, rerun=True)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub( + wf2, rerun=True, propagate_rerun=False + ) # rerun will not be propagated to each task) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # the second task should be recomputed assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @@ -2890,40 +2742,37 @@ def test_wf_nostate_nodecachelocations(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x"], cache_dir=cache_dir1) - wf1.add(Ten(name="ten", x=wf1.lzin.x)) - wf1.add(Add2(name="add2", x=wf1.ten.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x): + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out)) + return add2.out + + wf1 = Workflow1(x=3) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) - results1 = wf1.result() - assert 12 == results1.output.out + assert 12 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Ten(name="ten", x=wf2.lzin.x)) - wf2.add(Add2(name="add2", x=wf2.ten.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out)) + return add2.out + + wf2 = Workflow2(x=2) - results2 = wf2.result() - assert 12 == results2.output.out + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) + + assert 12 == results2.outputs.out # checking if the second wf runs again, but runs only one task - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # the second wf should rerun one task assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @@ -2939,37 +2788,37 @@ def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x"], cache_dir=cache_dir1) - wf1.add(Ten(name="ten", x=wf1.lzin.x)) - wf1.add(Add2(name="add2", x=wf1.ten.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x): + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out)) + return add2.out - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + wf1 = Workflow1(x=3) + + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) - results1 = wf1.result() - assert 12 == results1.output.out + assert 12 == results1.outputs.out + + @workflow.define + def Workflow2(x, y): + ten = workflow.add(Ten(x=x)) + add2 = workflow.add(Add2(x=ten.out)) + return add2.out + + wf2 = Workflow2(x=2) - wf2 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir2) - wf2.add(Ten(name="ten", x=wf2.lzin.x)) - wf2.add(Add2(name="add2", x=wf2.ten.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.plugin = plugin # updating cache_locations after adding the tasks wf2.cache_locations = cache_dir1 - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(wf2) - results2 = wf2.result() - assert 12 == results2.output.out + assert 12 == results2.outputs.out # checking if the second wf runs again, but runs only one task - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # the second wf should have only one task run assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @@ -2984,42 +2833,40 @@ def test_wf_state_cachelocations(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1[0].output.out == 8 - assert results1[1].output.out == 82 + assert results1.outputs.out[0] == 8 + assert results1.outputs.out[1] == 82 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3048,42 +2895,38 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1[0].output.out == 8 - assert results1[1].output.out == 82 + assert results1.outputs.out[0] == 8 + assert results1.outputs.out[1] == 82 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2, rerun=True) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(wf2, rerun=True) t2 = time.time() - t0 - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3113,43 +2956,39 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1[0].output.out == 8 - assert results1[1].output.out == 82 + assert results1.outputs.out[0] == 8 + assert results1.outputs.out[1] == 82 - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin - wf2.mult.inputs.y = wf2.lzin.y + @workflow.define + def Workflow2(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3177,38 +3016,35 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + wf1 = Workflow1(x=2, y=3) - results1 = wf1.result() - assert results1.output.out == 8 + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - wf2.plugin = plugin + assert results1.outputs.out == 8 - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) - results2 = wf2.result() - assert results2[0].output.out == 8 - assert results2[1].output.out == 82 + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) + + assert results2.outputs.out[0] == 8 + assert results2.outputs.out[1] == 82 # checking the directory from the first wf assert wf1.output_dir.exists() @@ -3228,43 +3064,39 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): cache_dir1_empty = tmpdir.mkdir("test_wf_cache3_empty") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=2, y=3) t0 = time.time() # changing cache_locations to non-existing dir - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2, cache_locations=cache_dir1_empty) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1_empty + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert 8 == results2.output.out + assert 8 == results2.outputs.out # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3273,8 +3105,7 @@ def test_wf_nostate_cachelocations_updated(plugin, tmpdir): assert t2 > 2 # checking if both wf run - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) @@ -3287,43 +3118,38 @@ def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - # different argument assignment - wf2.add(Multiply(name="mult", x=wf2.lzin.y, y=wf2.lzin.x)) - wf2.add(Add2(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = 2 - wf2.inputs.y = 3 - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + # different argument assignment + mult = workflow.add(Multiply(x=y, y=x)) + add2 = workflow.add(Add2(x=mult.out)) + return add2.out - results2 = wf2.result() - assert 8 == results2.output.out + wf2 = Workflow2(x=2, y=3) + + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) + + assert 8 == results2.outputs.out # checking if both dir exists - assert wf1.output_dir.exists() - assert wf2.output_dir.exists() + assert results1.output_dir != results2.output_dir # the second wf should have only one task run assert len(list(Path(cache_dir1).glob("F*"))) == 2 @@ -3339,46 +3165,38 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add( - Multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - ) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 82] + assert results2.outputs.out == [8, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3404,46 +3222,36 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add( - Multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - ) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2, rerun=True) + with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: + results2 = sub(wf2, rerun=True) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 82] + assert results2.outputs.out == [8, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3467,45 +3275,38 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add(Multiply(name="mult")) - wf2.mult.split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 82] + assert results2.outputs.out == [8, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3530,46 +3331,38 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): cache_dir1 = tmpdir.mkdir("test_wf_cache3") cache_dir2 = tmpdir.mkdir("test_wf_cache4") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add( - Multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) - ) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert results1.output.out == [8, 82] + assert results1.outputs.out == [8, 82] - wf2 = Workflow( - name="wf", - input_spec=["x", "y"], - cache_dir=cache_dir2, - cache_locations=cache_dir1, - ) - wf2.add( - Multiply(name="mult").split(splitter=["x", "y"], x=wf2.lzin.x, y=wf2.lzin.y) - ) - wf2.add(Add2Wait(name="add2", x=wf2.mult.lzout.out)) - wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.plugin = plugin + @workflow.define + def Workflow2(x, y): + + mult = workflow.add(Multiply().split(splitter=["x", "y"], x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf2 = Workflow2(x=[2, 20], y=[3, 4]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf2) + with Submitter( + worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 + ) as sub: + results2 = sub(wf2) t2 = time.time() - t0 - results2 = wf2.result() - assert results2.output.out == [8, 10, 62, 82] + assert results2.outputs.out == [8, 10, 62, 82] # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3593,21 +3386,20 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): """ cache_dir1 = tmpdir.mkdir("test_wf_cache3") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = 2 - wf1.inputs.y = 3 - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1(x=2, y=3) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out # checkoing output_dir after the first run assert wf1.output_dir.exists() @@ -3616,12 +3408,11 @@ def test_wf_nostate_runtwice_usecache(plugin, tmpdir): # running workflow the second time t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t2 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1.output.out + assert 8 == results1.outputs.out # checking if no new directory is created assert cache_dir_content == os.listdir(wf1.cache_dir) @@ -3639,21 +3430,21 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): """ cache_dir1 = tmpdir.mkdir("test_wf_cache3") - wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) - wf1.add(Multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) - wf1.add(Add2Wait(name="add2", x=wf1.mult.lzout.out)) - wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) - wf1.plugin = plugin + @workflow.define + def Workflow1(x, y): + mult = workflow.add(Multiply(x=x, y=y)) + add2 = workflow.add(Add2Wait(x=mult.out)) + return add2.out + + wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t1 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1[0].output.out - assert 602 == results1[1].output.out + assert 8 == results1.outputs.out[0] + assert 602 == results1.outputs.out[1] # checkoing output_dir after the first run assert [odir.exists() for odir in wf1.output_dir] @@ -3663,13 +3454,12 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): # running workflow the second time t0 = time.time() - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf1) + with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: + results1 = sub(wf1) t2 = time.time() - t0 - results1 = wf1.result() - assert 8 == results1[0].output.out - assert 602 == results1[1].output.out + assert 8 == results1.outputs.out[0] + assert 602 == results1.outputs.out[1] # checking if no new directory is created assert cache_dir_content == os.listdir(wf1.cache_dir) # for win and dask/slurm the time for dir creation etc. might take much longer @@ -3715,25 +3505,21 @@ def test_cache_propagation2(tmpdir, create_tasks): def test_cache_propagation3(tmpdir, create_tasks): """Shared cache_dir with state""" wf, t1, t2 = create_tasks - wf = Workflow().split("x", x=[1, 2]) + wf = wf.split("x", x=[1, 2]) wf.cache_dir = (tmpdir / "shared").strpath wf(plugin="cf") assert wf.cache_dir == t1.cache_dir == t2.cache_dir def test_workflow_combine1(tmpdir): - wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) - wf1.add(Power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b)) - wf1.add(Identity(name="identity1", x=wf1.power.lzout.out).combine("power.a")) - wf1.add(Identity(name="identity2", x=wf1.identity1.lzout.out).combine("power.b")) - wf1.set_output( - { - "out_pow": wf1.power.lzout.out, - "out_iden1": wf1.identity1.lzout.out, - "out_iden2": wf1.identity2.lzout.out, - } - ) - wf1.cache_dir = tmpdir + @workflow.define(outputs=["out_pow", "out_iden1", "out_iden2"]) + def Workflow1(a, b): + power = workflow.add(Power().split(["a", "b"], a=a, b=b)) + identity1 = workflow.add(Identity(x=power.out).combine("power.a")) + identity2 = workflow.add(Identity(x=identity1.out).combine("power.b")) + return power.out, identity1.out, identity2.out + + wf1 = Workflow1(a=[1, 2], b=[2, 3]) outputs = wf1() assert outputs.out_pow == [1, 1, 4, 8] @@ -3742,26 +3528,26 @@ def test_workflow_combine1(tmpdir): def test_workflow_combine2(tmpdir): - wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) - wf1.add( - Power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b).combine("a") - ) - wf1.add(Identity(name="identity", x=wf1.power.lzout.out).combine("power.b")) - wf1.set_output({"out_pow": wf1.power.lzout.out, "out_iden": wf1.identity.lzout.out}) - wf1.cache_dir = tmpdir - outputs = wf1() + @workflow.define(outputs=["out_pow", "out_iden"]) + def Workflow1(a, b): + power = workflow.add(Power().split(["a", "b"], a=a, b=b).combine("a")) + identity = workflow.add(Identity(x=power.out).combine("power.b")) + return power.out, identity.out + + wf1 = Workflow1(a=[1, 2], b=[2, 3]) + outputs = wf1(cache_dir=tmpdir) assert outputs.out_pow == [[1, 4], [1, 8]] assert outputs.out_iden == [[1, 4], [1, 8]] -# testing lzout.all to collect all of the results and let PythonTask deal with it +# g.all to collect all of the results and let PythonTask deal with it def test_wf_lzoutall_1(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_sub2_res function - by using lzout.all syntax + using.all syntax """ @workflow.define @@ -3781,7 +3567,7 @@ def Workflow(x, y): def test_wf_lzoutall_1a(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_res function - by using lzout.all syntax in the node connections and for wf output + using.all syntax in the node connections and for wf output """ @workflow.define @@ -3801,7 +3587,7 @@ def Workflow(x, y): def test_wf_lzoutall_st_1(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_res function - by using lzout.all syntax + using.all syntax """ @workflow.define @@ -3821,7 +3607,7 @@ def Workflow(x, y): def test_wf_lzoutall_st_1a(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_res function - by using lzout.all syntax + using.all syntax """ @workflow.define @@ -3846,7 +3632,7 @@ def Workflow(x, y): def test_wf_lzoutall_st_2(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_res function - by using lzout.all syntax + using.all syntax """ @workflow.define @@ -3874,7 +3660,7 @@ def Workflow(x, y): def test_wf_lzoutall_st_2a(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_res function - by using lzout.all syntax + using.all syntax """ @workflow.define @@ -3978,7 +3764,7 @@ def Workflow(x): with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4001,7 +3787,7 @@ def Workflow(x): with pytest.raises(Exception) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4022,7 +3808,7 @@ def Workflow(x): wf = Workflow(x=[1, "hi"]) # TypeError for adding str and int with pytest.raises(Exception) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4039,7 +3825,7 @@ def Workflow(x): wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "raised an error" in str(excinfo.value) assert "addvar1" in str(excinfo.value) @@ -4110,7 +3896,7 @@ def Workflow(x): with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.addvar1._errored is True @@ -4129,12 +3915,12 @@ def Workflow(x): addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) - return addvar2.out + return addvar3.out wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.addvar1._errored is True @@ -4147,18 +3933,18 @@ def test_wf_upstream_error7b(plugin, tmpdir): the second and the third tasks are set as the workflow output """ - @workflow.define + @workflow.define(outputs=["out1", "out2"]) def Workflow(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) - return addvar2.out, addvar3.out # (outputs=["out1", "out2"]) + return addvar2.out, addvar3.out # wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.addvar1._errored is True @@ -4168,18 +3954,18 @@ def Workflow(x): def test_wf_upstream_error8(plugin, tmpdir): """workflow with three tasks, the first one raises an error, so 2 others are removed""" - @workflow.define + @workflow.define(outputs=["out1", "out2"]) def Workflow(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) addtwo = workflow.add(FunAddTwo(a=addvar1.out)) - return addvar2.out, addtwo.out # (outputs=["out1", "out2"]) + return addvar2.out, addtwo.out # wf = Workflow(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @@ -4202,13 +3988,13 @@ def Workflow(x): follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out)) addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) + workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) return follow_err.out # out1 wf = Workflow(x=2) with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.err._errored is True @@ -4228,7 +4014,7 @@ def Workflow(x): addvar1 = workflow.add(FunAddVarDefault(a=x)) err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) - follow_err = workflow.add(FunAddVarDefault(a=err.out)) + workflow.add(FunAddVarDefault(a=err.out)) addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) addvar2 = workflow.add(FunAddVarDefault(a=addtwo.out)) @@ -4237,7 +4023,7 @@ def Workflow(x): wf = Workflow(x=2) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert wf.err._errored is True assert wf.follow_err._errored == ["err"] @@ -4249,7 +4035,7 @@ def test_wf_upstream_error9b(plugin, tmpdir): both branches are connected to the workflow output """ - @workflow.define + @workflow.define(outputs=["out1", "out2"]) def Workflow(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) @@ -4258,12 +4044,12 @@ def Workflow(x): addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) - return follow_err.out, addtwo.out # (outputs=["out1", "out2"]) + return follow_err.out, addvar2.out wf = Workflow(x=2) with pytest.raises(ValueError) as excinfo: with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + sub(wf) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) assert wf.err._errored is True @@ -4302,9 +4088,9 @@ def test_graph_1(tmpdir, splitter): @workflow.define def Workflow(x, y): - mult_1 = workflow.add(Multiply(x=x, y=y)) - mult_2 = workflow.add(Multiply(x=x, y=x)) - add2 = workflow.add(Add2(x=mult_1.out)) + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") return add2.out wf = Workflow().split(splitter, x=[1, 2]) @@ -4347,9 +4133,9 @@ def test_graph_1st(tmpdir): @workflow.define def Workflow(x, y): - mult_1 = workflow.add(Multiply(y=y).split("x", x=x)) - mult_2 = workflow.add(Multiply(x=x, y=x)) - add2 = workflow.add(Add2(x=mult_1.out)) + mult_1 = workflow.add(Multiply(y=y).split("x", x=x), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") return add2.out wf = Workflow(x=[1, 2], y=2) @@ -4392,9 +4178,9 @@ def test_graph_1st_cmb(tmpdir): @workflow.define def Workflow(x, y): - mult = workflow.add(Multiply(y=y).split("x", x=x)) - add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) - sum = workflow.add(ListSum(x=add2.out)) + mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") + sum = workflow.add(ListSum(x=add2.out), name="sum") return sum.out wf = Workflow(x=[1, 2], y=2) @@ -4435,14 +4221,16 @@ def test_graph_2(tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define def Workflow(x): - wfnd = workflow.add(Wfnd(x=x)) + wfnd = workflow.add(Wfnd(x=x), name="wfnd") return wfnd.out + wf = Workflow(x=2) + # simple graph dotfile_s = wf.create_dotfile() dotstr_s_lines = dotfile_s.read_text().split("\n") @@ -4473,12 +4261,12 @@ def test_graph_2st(tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define def Workflow(x): - wfnd = workflow.add(Wfnd(x=x).split("x", x=x)) + wfnd = workflow.add(Wfnd(x=x).split("x", x=x), name="wfnd") return wfnd.out wf = Workflow(x=[1, 2]) @@ -4513,15 +4301,17 @@ def test_graph_3(tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - wfnd = workflow.add(Wfnd(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") return wfnd.out + wf = Workflow(x=2) + # simple graph dotfile_s = wf.create_dotfile() dotstr_s_lines = dotfile_s.read_text().split("\n") @@ -4558,15 +4348,17 @@ def test_graph_3st(tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define def Workflow(x, y): - mult = workflow.add(Multiply(y=y).split("x", x=x)) - wfnd = workflow.add(Wfnd(x=mult.out)) + mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") + wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") return wfnd.out + wf = Workflow(x=[1, 2], y=2) + # simple graph dotfile_s = wf.create_dotfile() dotstr_s_lines = dotfile_s.read_text().split("\n") @@ -4602,14 +4394,14 @@ def test_graph_4(tmpdir): @workflow.define def Wfnd(x): - add2_a = workflow.add(Add2(x=x)) - add2_b = workflow.add(Add2(x=add2_a.out)) + add2_a = workflow.add(Add2(x=x), name="add2_a") + add2_b = workflow.add(Add2(x=add2_a.out), name="add2_b") return add2_b.out @workflow.define def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - wfnd = workflow.add(Wfnd(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") return wfnd.out wf = Workflow(x=2, y=3) @@ -4651,14 +4443,14 @@ def test_graph_5(tmpdir): @workflow.define def Wfnd(x): - add2_a = workflow.add(Add2(x=x)) - add2_b = workflow.add(Add2(x=add2_a.out)) + add2_a = workflow.add(Add2(x=x), name="add2_a") + add2_b = workflow.add(Add2(x=add2_a.out), name="add2_b") return add2_b.out @workflow.define def Workflow(x, y): - wfnd = workflow.add(Wfnd(x=x)) - mult = workflow.add(Multiply(x=wfnd.out, y=y)) + wfnd = workflow.add(Wfnd(x=x), name="wfnd") + mult = workflow.add(Multiply(x=wfnd.out, y=y), name="mult") return mult.out wf = Workflow(x=2, y=3) @@ -4706,19 +4498,15 @@ def printer(a): @workflow.define def Workflow(text): - printer1 = workflow.add(printer(a=text)) - return printer1.out # out1 - wf = Workflow().split(("text"), text=text) + wf = Workflow().split(text=text) with Submitter(worker="cf", n_procs=6) as sub: results = sub(wf) - res = wf.result() - - assert res[0].output.out1 == "test" and res[1].output.out1 == "test" + assert results.output.out1[0] == "test" and results.output.out1[0] == "test" @pytest.mark.timeout(40) @@ -4730,50 +4518,40 @@ def test_inner_outer_wf_duplicate(tmpdir): start_list = [3, 4] @python.define - def one_arg(start_number): + def OneArg(start_number): for k in range(10): start_number += 1 return start_number @python.define - def one_arg_inner(start_number): + def OneArgInner(start_number): for k in range(10): start_number += 1 return start_number - # Outer workflow - test_outer = Workflow( - name="test_outer", - input_spec=["start_number", "task_name", "dummy"], - cache_dir=tmpdir, - dummy=1, - ) - # Splitting on both arguments - test_outer.split( - ["start_number", "task_name"], start_number=start_list, task_name=task_list - ) - # Inner Workflow - @workflow.define - def Workflow(start_number1): - Ilevel1 = workflow.add( - one_arg_inner(start_number=test_inner.lzin.start_number1) - ) + @workflow.define(outputs=["res"]) + def InnerWf(start_number1): + inner_level1 = workflow.add(OneArgInner(start_number=start_number1)) + return inner_level1.out - test_inner.set_output([("res", test_inner.Ilevel1.lzout.out)]) + # Outer workflow has two nodes plus the inner workflow - # Outer workflow has two nodes plus the inner workflow - test_outer.add(one_arg(name="level1", start_number=test_outer.lzin.start_number)) - test_outer.add(test_inner) - test_inner.inputs.start_number1 = test_outer.level1.lzout.out + # Outer workflow + @workflow.define(outputs=["res2"]) + def OuterWf(start_number, task_name, dummy): + level1 = workflow.add(OneArg(start_number=start_number)) + inner = workflow.add(InnerWf(start_number1=level1.out)) + return inner.res - test_outer.set_output([("res2", test_outer.test_inner.lzout.res)]) + test_outer = OuterWf(dummy=1).split( + ["start_number", "task_name"], start_number=start_list, task_name=task_list + ) with Submitter(worker="cf") as sub: - sub(test_outer) + res = sub(test_outer) - res = test_outer.result() - assert res[0].output.res2 == 23 and res[1].output.res2 == 23 + assert res.output.res2[0] == 23 and res.output.res2[1] == 23 def test_rerun_errored(tmpdir, capfd): @@ -4781,7 +4559,7 @@ def test_rerun_errored(tmpdir, capfd): Only the errored tasks and workflow should be rerun""" @python.define - def pass_odds(x): + def PassOdds(x): if x % 2 == 0: print(f"x%2 = {x % 2} (error)\n") raise Exception("even error") @@ -4791,9 +4569,11 @@ def pass_odds(x): @workflow.define def Workflow(x): - pass_odds = workflow.add(pass_odds().split("x", x=[1, 2, 3, 4, 5])) + pass_odds = workflow.add(PassOdds().split("x", x=x)) return pass_odds.out + wf = Workflow(x=[1, 2, 3, 4, 5]) + with pytest.raises(Exception): wf() with pytest.raises(Exception): @@ -4853,9 +4633,11 @@ def Workflow(x: ty.List[int], y: int): ListMultSum(scalar=y, in_list=D.sum) ) - return E.sum, E.products # (outputs=["alpha", "beta"]) + return E.sum, E.products + + wf = Workflow(x=[1, 2, 3, 4], y=10) - results = wf(x=[1, 2, 3, 4], y=10) + results = wf() assert results.outputs.alpha == 3000000 assert results.outputs.beta == [100000, 400000, 900000, 1600000] From afe64e1bfeebc48dc9cabcc9a62779b4ed8a3f3e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 20:12:44 +1100 Subject: [PATCH 302/342] assert results not errored --- pydra/engine/tests/test_workflow.py | 274 ++++++++++++++++++++++++++++ 1 file changed, 274 insertions(+) diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 1c6e0bc173..b72aa47178 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -69,6 +69,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before assert 4 == results.outputs.out @@ -89,6 +91,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 4 == results.outputs.out @@ -105,6 +109,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 4 == results.outputs.out @@ -194,6 +200,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == results.outputs.out @@ -213,6 +221,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == results.outputs.out @@ -233,6 +243,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == results.outputs.out @@ -252,6 +264,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking outputs from both nodes assert 6 == results.outputs.out_mult assert 8 == results.outputs.out_add2 @@ -273,6 +287,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking outputs from both nodes assert 6 == results.outputs.out_mult assert 8 == results.outputs.out_add2 @@ -293,6 +309,8 @@ def Workflow(x, y): with Submitter(worker=plugin_dask_opt) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 4 == results.outputs.out @@ -330,6 +348,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 5 == results.outputs.out @@ -350,6 +370,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 5 == results.outputs.out @@ -368,6 +390,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 5 == results.outputs.out_sum assert 1 == results.outputs.out_sub @@ -387,6 +411,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 5 == results.outputs.out_sum assert 1 == results.outputs.out_sub @@ -407,6 +433,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 6 == results.outputs.out1 assert 8 == results.outputs.out2 @@ -427,6 +455,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 6 == results.outputs.out1 assert 8 == results.outputs.out2 @@ -446,6 +476,8 @@ def Workflow(x): with Submitter(cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] @@ -467,6 +499,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results.outputs.out[0] == 3 assert results.outputs.out[1] == 4 @@ -573,6 +607,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before assert results == [] @@ -592,6 +628,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] @@ -613,6 +651,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results.outputs.out == [3, 4] @@ -632,6 +672,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results.outputs.out == [3, 4] @@ -652,6 +694,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [13, 14] @@ -669,6 +713,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before assert results.outputs.out == [] @@ -688,6 +734,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results.outputs.out[0] == 3 assert results.outputs.out[1] == 4 @@ -706,6 +754,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert results.outputs.out == [3, 4] @@ -728,6 +778,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + expected = [ ({"wfst_3.x": 1, "wfst_3.y": 11}, 13), ({"wfst_3.x": 2, "wfst_3.y": 12}, 26), @@ -769,6 +821,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26)] assert results.outputs.out == [13, 26] @@ -787,6 +841,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] @@ -809,6 +865,8 @@ def Workflow(a, b): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] @@ -830,6 +888,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out[0] == 13 assert results.outputs.out[1] == 14 assert results.outputs.out[2] == 24 @@ -850,6 +910,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out[0] == 13 assert results.outputs.out[1] == 14 assert results.outputs.out[2] == 24 @@ -871,6 +933,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out[0][0] == 13 assert results.outputs.out[0][1] == 24 assert results.outputs.out[0][2] == 35 @@ -893,6 +957,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out[0] == [13, 24, 35] assert results.outputs.out[1] == [14, 26, 38] @@ -911,6 +977,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [11, 22, 33] @@ -928,6 +996,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out[0] == [11, 22, 33] assert results.outputs.out[1] == [12, 24, 36] @@ -946,6 +1016,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [11, 12, 22, 24, 33, 36] @@ -967,6 +1039,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # splitter from the first task should propagate to all tasks, # splitter_rpn should be the same in all tasks assert wf.mult.state.splitter == ["mult.x", "mult.y"] @@ -1004,6 +1078,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # splitter from the 1st task should propagate and the 2nd task should add one more # splitter_rpn for the 2nd and the 3rd task should be the same assert wf.add2_1st.state.splitter == "add2_1st.x" @@ -1043,6 +1119,8 @@ def Workflow(x, y): with Submitter(worker=plugin_dask_opt) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results) == 6 assert results.outputs.out[0] == 39 assert results.outputs.out[1] == 42 @@ -1067,6 +1145,8 @@ def Workflow(x, y): with Submitter(worker=plugin_dask_opt) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results.outputs.out) == 6 assert results.outputs.out == [39, 42, 52, 56, 65, 70] @@ -1088,6 +1168,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results) == 2 assert results.outputs.out[0][0] == 39 assert results.outputs.out[0][1] == 52 @@ -1114,6 +1196,8 @@ def Workflow(x, y): with Submitter(cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results.outputs.out) == 2 assert results.outputs.out[0] == [39, 52, 65] assert results.outputs.out[1] == [42, 56, 70] @@ -1136,6 +1220,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results) == 3 assert results.outputs.out[0][0] == 39 assert results.outputs.out[0][1] == 42 @@ -1162,6 +1248,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results.outputs.out) == 3 assert results.outputs.out[0] == [39, 42] assert results.outputs.out[1] == [52, 56] @@ -1185,6 +1273,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results) == 6 assert results.outputs.out[0] == 39 assert results.outputs.out[1] == 42 @@ -1212,6 +1302,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert len(results.outputs.out) == 6 @@ -1235,6 +1327,8 @@ def Workflow(x, y, z): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results) == 4 assert results.outputs.out[0][0] == 27 assert results.outputs.out[0][1] == 28 @@ -1266,6 +1360,8 @@ def Workflow(x, y, z): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results.outputs.out) == 4 assert results.outputs.out[0] == [27, 28] assert results.outputs.out[1] == [117, 118] @@ -1296,6 +1392,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [39, 56] @@ -1318,6 +1416,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [9, 16] @@ -1378,6 +1478,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the splitter is created properly assert wf.mult.state.splitter == ["_add2", "mult.y"] assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] @@ -1404,6 +1506,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the splitter is created properly assert wf.mult.state.splitter == ["_add2", "mult.y"] assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] @@ -1430,6 +1534,8 @@ def Workflow(x, y, z): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the splitter is created properly assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] @@ -1475,6 +1581,8 @@ def Workflow(x, y, z): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the splitter is created properly assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] @@ -1519,6 +1627,8 @@ def Workflow(x: int): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf.add2.state.splitter == "add2.x" assert wf.add2.state.splitter_rpn == ["add2.x"] @@ -1542,6 +1652,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf.mult.state.splitter == "mult.x" assert wf.mult.state.splitter_rpn == ["mult.x"] @@ -1565,6 +1677,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf.mult.state.splitter == ["mult.x", "mult.y"] assert wf.mult.state.splitter_rpn == ["mult.x", "mult.y", "*"] @@ -1590,6 +1704,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf.mult.state.splitter == "mult.x" assert wf.mult.state.splitter_rpn == ["mult.x"] assert wf.add2.state.splitter == "_mult" @@ -1619,6 +1735,8 @@ def Workflow(x, y, b): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf.mult.state.splitter == ["_list", ["mult.y", "mult.x"]] assert wf.mult.state.splitter_rpn == ["list.x", "mult.y", "mult.x", "*", "*"] assert wf.addvar.state.splitter == ["_mult", "addvar.b"] @@ -1693,6 +1811,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out[0] == 13 assert results.outputs.out[1] == 24 @@ -1713,6 +1833,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [13, 24] @@ -1734,6 +1856,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results) == 3 assert results.outputs.out[0] == 39 assert results.outputs.out[1] == 52 @@ -1758,6 +1882,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert len(results.outputs.out) == 3 assert results.outputs.out == [39, 52, 65] @@ -1785,6 +1911,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == 4 @@ -1810,6 +1938,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before assert results.outputs.out == 4 @@ -1836,6 +1966,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == 5 @@ -1866,6 +1998,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == 5 # adding another layer of workflow @@ -1879,6 +2013,8 @@ def WorkflowO(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf_o) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == 6 @@ -1904,6 +2040,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert wf._checksum == checksum_before assert results.outputs.out == [4, 6] @@ -1930,6 +2068,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [4, 6] @@ -1954,6 +2094,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [4, 6] @@ -1978,6 +2120,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [4, 6] @@ -2001,6 +2145,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert results.outputs.out[0] == 4 @@ -2031,6 +2177,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert results.outputs.out == [4, 42] @@ -2057,6 +2205,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert results.outputs.out[0] == 4 @@ -2087,6 +2237,8 @@ def Workflow(x, y): with Submitter(cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert results.outputs.out == [4, 42] @@ -2115,6 +2267,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert results.outputs.out[0] == 4 @@ -2145,6 +2299,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == 6 @@ -2170,6 +2326,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out == [6, 8] @@ -2194,6 +2352,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + + assert not results.errored, "\n".join(results.errors["error message"]) # assert wf.output_dir.exists() assert results.outputs.out[0] == 6 @@ -2219,6 +2379,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == results.outputs.out shutil.rmtree(cache_dir) @@ -2242,6 +2404,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == results.outputs.out shutil.rmtree(cache_dir) @@ -2267,6 +2431,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2285,6 +2451,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out @@ -2321,6 +2489,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2339,6 +2509,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out @@ -2376,6 +2548,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2394,6 +2568,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out == results2.outputs.out_pr @@ -2430,6 +2606,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out1 @@ -2447,6 +2625,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out2 @@ -2481,6 +2661,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out1 @@ -2498,6 +2680,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out2 @@ -2533,6 +2717,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2549,6 +2735,8 @@ def Workflow2(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: results2 = sub(wf2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out @@ -2584,6 +2772,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2601,6 +2791,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out @@ -2640,6 +2832,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2657,6 +2851,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2, rerun=True, propagate_rerun=False) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out @@ -2696,6 +2892,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -2753,6 +2951,8 @@ def Workflow1(x): with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + assert not results1.errored, "\n".join(results1.errors["error message"]) + assert 12 == results1.outputs.out @workflow.define @@ -2769,6 +2969,8 @@ def Workflow2(x, y): ) as sub: results2 = sub(wf2) + assert not results2.errored, "\n".join(results2.errors["error message"]) + assert 12 == results2.outputs.out # checking if the second wf runs again, but runs only one task @@ -2799,6 +3001,8 @@ def Workflow1(x): with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + assert not results1.errored, "\n".join(results1.errors["error message"]) + assert 12 == results1.outputs.out @workflow.define @@ -2815,6 +3019,8 @@ def Workflow2(x, y): with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: results2 = sub(wf2) + assert not results2.errored, "\n".join(results2.errors["error message"]) + assert 12 == results2.outputs.out # checking if the second wf runs again, but runs only one task @@ -2844,6 +3050,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out[0] == 8 @@ -2863,6 +3071,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out[0] == 8 @@ -2906,6 +3116,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out[0] == 8 @@ -2923,6 +3135,8 @@ def Workflow2(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: results2 = sub(wf2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out[0] == 8 @@ -2967,6 +3181,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out[0] == 8 @@ -2985,6 +3201,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out[0] == 8 @@ -3027,6 +3245,8 @@ def Workflow1(x, y): with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + assert not results1.errored, "\n".join(results1.errors["error message"]) + assert results1.outputs.out == 8 @workflow.define @@ -3043,6 +3263,8 @@ def Workflow2(x, y): ) as sub: results2 = sub(wf2) + assert not results2.errored, "\n".join(results2.errors["error message"]) + assert results2.outputs.out[0] == 8 assert results2.outputs.out[1] == 82 @@ -3075,6 +3297,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -3094,6 +3318,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1_empty ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out @@ -3129,6 +3355,8 @@ def Workflow1(x, y): with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + assert not results1.errored, "\n".join(results1.errors["error message"]) + assert 8 == results1.outputs.out @workflow.define @@ -3146,6 +3374,8 @@ def Workflow2(x, y): ) as sub: results2 = sub(wf2) + assert not results2.errored, "\n".join(results2.errors["error message"]) + assert 8 == results2.outputs.out # checking if both dir exists @@ -3176,6 +3406,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out == [8, 82] @@ -3194,6 +3426,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out == [8, 82] @@ -3233,6 +3467,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out == [8, 82] @@ -3249,6 +3485,8 @@ def Workflow2(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: results2 = sub(wf2, rerun=True) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out == [8, 82] @@ -3286,6 +3524,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out == [8, 82] @@ -3304,6 +3544,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out == [8, 82] @@ -3342,6 +3584,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert results1.outputs.out == [8, 82] @@ -3360,6 +3604,8 @@ def Workflow2(x, y): worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub(wf2) + + assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert results2.outputs.out == [8, 10, 62, 82] @@ -3397,6 +3643,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out @@ -3410,6 +3658,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t2 = time.time() - t0 assert 8 == results1.outputs.out @@ -3441,6 +3691,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out[0] @@ -3456,6 +3708,8 @@ def Workflow1(x, y): t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: results1 = sub(wf1) + + assert not results1.errored, "\n".join(results1.errors["error message"]) t2 = time.time() - t0 assert 8 == results1.outputs.out[0] @@ -3561,6 +3815,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == results.outputs.out @@ -3581,6 +3837,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out_all == {"out_add": 8, "out_sub": 4} @@ -3601,6 +3859,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out_add == [8, 62, 62, 602] @@ -3621,6 +3881,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out_all == [ {"out_add": 8, "out_sub": 4}, {"out_add": 62, "out_sub": 58}, @@ -3646,6 +3908,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out_add[0] == [8, 62] assert results.outputs.out_add[1] == [62, 602] @@ -3674,6 +3938,8 @@ def Workflow(x, y): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.outputs.out_all == [ {"out_add": [8, 62], "out_sub": [4, 58]}, {"out_add": [62, 602], "out_sub": [58, 598]}, @@ -3696,6 +3962,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the file exists and if it is in the Workflow directory wf_out = results.outputs.wf_out.fspath wf_out.exists() @@ -3718,6 +3986,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the file exists and if it is in the Workflow directory for ii, file in enumerate(results.outputs.wf_out): assert file.fspath.exists() @@ -3740,6 +4010,8 @@ def Workflow(x): with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + # checking if the file exists and if it is in the Workflow directory for key, val in results.outputs.wf_out.items(): if key == "random_int": @@ -4506,6 +4778,8 @@ def Workflow(text): with Submitter(worker="cf", n_procs=6) as sub: results = sub(wf) + assert not results.errored, "\n".join(results.errors["error message"]) + assert results.output.out1[0] == "test" and results.output.out1[0] == "test" From cc8646ba159dc1b7721df9e73b3ee0268ded1ffb Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 21:10:05 +1100 Subject: [PATCH 303/342] added node names --- pydra/engine/tests/test_workflow.py | 153 +++++++++++++++------------- 1 file changed, 80 insertions(+), 73 deletions(-) diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index b72aa47178..ec7b13c569 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1030,8 +1030,8 @@ def test_wf_3sernd_ndst_1(plugin, tmpdir): @workflow.define def Workflow(x, y): mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) - add2_1st = workflow.add(Add2(x=mult.out)) - add2_2nd = workflow.add(Add2(x=add2_1st.out)) + add2_1st = workflow.add(Add2(x=mult.out), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") return add2_2nd.out wf = Workflow(x=[1, 2], y=[11, 12]) @@ -1068,9 +1068,9 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2_1st = workflow.add(Add2().split("x", x=x)) + add2_1st = workflow.add(Add2().split("x", x=x), name="add2_1st") mult = workflow.add(Multiply(x=add2_1st.out).split("y", y=y)) - add2_2nd = workflow.add(Add2(x=mult.out)) + add2_2nd = workflow.add(Add2(x=mult.out), name="add2_2nd") return add2_2nd.out wf = Workflow(x=[1, 2], y=[11, 12]) @@ -1108,8 +1108,8 @@ def test_wf_3nd_st_1(plugin_dask_opt, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2(x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -1135,8 +1135,8 @@ def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -1158,8 +1158,8 @@ def test_wf_3nd_st_2(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2(x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -1210,8 +1210,8 @@ def test_wf_3nd_st_3(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2(x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -1238,8 +1238,8 @@ def test_wf_3nd_ndst_3(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out).combine("add2y.x")) return mult.out @@ -1263,8 +1263,8 @@ def test_wf_3nd_st_4(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2(x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -1291,8 +1291,8 @@ def test_wf_3nd_ndst_4(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add( Multiply(x=add2x.out, y=add2y.out).combine(["add2x.x", "add2y.x"]) ) @@ -1317,8 +1317,8 @@ def test_wf_3nd_st_5(plugin, tmpdir): @workflow.define def Workflow(x, y, z): - add2x = workflow.add(Add2(x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") addvar = workflow.add(FunAddVar3(a=add2x.out, b=add2y.out, c=z)) return addvar.out @@ -1347,8 +1347,8 @@ def test_wf_3nd_ndst_5(plugin, tmpdir): @workflow.define def Workflow(x, y, z): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") addvar = workflow.add( FunAddVar3(a=add2x.out, b=add2y.out).split("c", c=z).combine("add2x.x") ) @@ -1378,8 +1378,8 @@ def test_wf_3nd_ndst_6(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add( Multiply(x=add2x.out, y=add2y.out) .split(("_add2x", "_add2y")) @@ -1404,8 +1404,8 @@ def test_wf_3nd_ndst_7(plugin, tmpdir): @workflow.define def Workflow(x): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=x)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=x), name="add2y") mult = workflow.add( Multiply(x=add2x.out, y=add2y.out).split(("_add2x", "_add2y")) ) @@ -1430,11 +1430,15 @@ def test_wf_3nd_8(tmpdir): @workflow.define(outputs=["out1", "out2", "out1a", "out2a"]) def Workflow(zip): - iden2flds_1 = workflow.add(Identity2Flds(x2="Hoi").split("x1", x1=zip)) + iden2flds_1 = workflow.add( + Identity2Flds(x2="Hoi").split("x1", x1=zip), name="iden2flds_1" + ) identity = workflow.add(Identity(x=iden2flds_1.out1)) - iden2flds_2 = workflow.add(Identity2Flds(x1=identity.out, x2=iden2flds_1.out2)) + iden2flds_2 = workflow.add( + Identity2Flds(x1=identity.out, x2=iden2flds_1.out2), name="iden2flds_2" + ) iden2flds_2a = workflow.add( Identity2Flds( @@ -1846,8 +1850,8 @@ def test_wf_st_singl_2(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2(x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2(x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -1872,8 +1876,8 @@ def test_wf_ndst_singl_2(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2(x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2(x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out @@ -2285,8 +2289,8 @@ def test_wfasnd_4(plugin, tmpdir): @workflow.define def Wfnd(x): - add2_1st = workflow.add(Add2(x=x)) - add2_2nd = workflow.add(Add2(x=add2_1st.out)) + add2_1st = workflow.add(Add2(x=x), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") return add2_2nd.out @workflow.define @@ -2312,8 +2316,8 @@ def test_wfasnd_ndst_4(plugin, tmpdir): @workflow.define def Wfnd(x): - add2_1st = workflow.add(Add2().split(x=x)) - add2_2nd = workflow.add(Add2(x=add2_1st.out)) + add2_1st = workflow.add(Add2().split(x=x), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") return add2_2nd.out @workflow.define @@ -2339,8 +2343,8 @@ def test_wfasnd_wfst_4(plugin, tmpdir): @workflow.define def Wfnd(x): - add2_1st = workflow.add(Add2(x=x)) - add2_2nd = workflow.add(Add2(x=add2_1st.out)) + add2_1st = workflow.add(Add2(x=x), name="add2_1st") + add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") return add2_2nd.out @workflow.define @@ -3769,8 +3773,12 @@ def test_workflow_combine1(tmpdir): @workflow.define(outputs=["out_pow", "out_iden1", "out_iden2"]) def Workflow1(a, b): power = workflow.add(Power().split(["a", "b"], a=a, b=b)) - identity1 = workflow.add(Identity(x=power.out).combine("power.a")) - identity2 = workflow.add(Identity(x=identity1.out).combine("power.b")) + identity1 = workflow.add( + Identity(x=power.out).combine("power.a"), name="identity1" + ) + identity2 = workflow.add( + Identity(x=identity1.out).combine("power.b"), name="identity2" + ) return power.out, identity1.out, identity2.out wf1 = Workflow1(a=[1, 2], b=[2, 3]) @@ -4027,9 +4035,8 @@ def test_wf_upstream_error1(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) - - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out wf = Workflow(x="hi") # TypeError for adding str and int @@ -4048,9 +4055,9 @@ def test_wf_upstream_error2(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out wf = Workflow().split( @@ -4072,9 +4079,9 @@ def test_wf_upstream_error3(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType().split("a", a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType().split("a", a=x), name="addvar1") - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out wf = Workflow(x=[1, "hi"]) # TypeError for adding str and int @@ -4130,8 +4137,8 @@ def test_wf_upstream_error6(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out # wf_out @@ -4158,10 +4165,10 @@ def test_wf_upstream_error7(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) - addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") return addvar3.out wf = Workflow(x="hi") # TypeError for adding str and int @@ -4183,10 +4190,10 @@ def test_wf_upstream_error7a(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) - addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") return addvar3.out wf = Workflow(x="hi") # TypeError for adding str and int @@ -4207,10 +4214,10 @@ def test_wf_upstream_error7b(plugin, tmpdir): @workflow.define(outputs=["out1", "out2"]) def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) - addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") + addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") return addvar2.out, addvar3.out # wf = Workflow(x="hi") # TypeError for adding str and int @@ -4228,9 +4235,9 @@ def test_wf_upstream_error8(plugin, tmpdir): @workflow.define(outputs=["out1", "out2"]) def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out)) + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") addtwo = workflow.add(FunAddTwo(a=addvar1.out)) return addvar2.out, addtwo.out # @@ -4254,12 +4261,12 @@ def test_wf_upstream_error9(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) - follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out)) + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") + follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out), name="follow_err") - addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out), name="addtwo") workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) return follow_err.out # out1 @@ -4283,13 +4290,13 @@ def test_wf_upstream_error9a(plugin, tmpdir): @workflow.define def Workflow(x): - addvar1 = workflow.add(FunAddVarDefault(a=x)) + addvar1 = workflow.add(FunAddVarDefault(a=x), name="addvar1") - err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") workflow.add(FunAddVarDefault(a=err.out)) - addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) - addvar2 = workflow.add(FunAddVarDefault(a=addtwo.out)) + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out), name="addtwo") + addvar2 = workflow.add(FunAddVarDefault(a=addtwo.out), name="addvar2") return addvar2.out # out1 # , ("out2", addtwo.out)]) wf = Workflow(x=2) @@ -4309,13 +4316,13 @@ def test_wf_upstream_error9b(plugin, tmpdir): @workflow.define(outputs=["out1", "out2"]) def Workflow(x): - addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) + addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") - err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi")) - follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out)) + err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") + follow_err = workflow.add(FunAddVarDefaultNoType(a=err.out), name="follow_err") - addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out)) - addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) + addtwo = workflow.add(FunAddTwoNoType(a=addvar1.out), name="addtwo") + addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out), name="addvar2") return follow_err.out, addvar2.out wf = Workflow(x=2) From bddb1f1cf5b8633e9adff44e14d59e08e1b5c23e Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 3 Mar 2025 21:43:09 +1100 Subject: [PATCH 304/342] debugging test_workflow --- pydra/engine/node.py | 20 ----------------- pydra/engine/submitter.py | 35 ++++++++++++++++++++++++----- pydra/engine/tests/test_workflow.py | 22 ++++++++++-------- 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index f3639e0f81..29cf3aedb0 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -305,26 +305,6 @@ def _extract_input_el(self, inputs, inp_nm, ind): else: return getattr(inputs, inp_nm)[ind] - def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: - """Split the definition into the different states it will be run over""" - # TODO: doesn't work properly for more cmplicated wf (check if still an issue) - if not self.state: - return {None: self._definition} - split_defs = {} - for input_ind in self.state.inputs_ind: - inputs_dict = {} - for inp in set(self.input_names): - if f"{self.name}.{inp}" in input_ind: - inputs_dict[inp] = self._extract_input_el( - inputs=self._definition, - inp_nm=inp, - ind=input_ind[f"{self.name}.{inp}"], - ) - split_defs[StateIndex(input_ind)] = attrs.evolve( - self._definition, **inputs_dict - ) - return split_defs - # else: # # todo it never gets here # breakpoint() diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 364f9c3217..afbdbc3bfd 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -645,12 +645,9 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: name=self.node.name, ) else: - for index, split_defn in self.node._split_definition().items(): + for index, split_defn in self._split_definition().items(): yield Task( - definition=self._resolve_lazy_inputs( - task_def=split_defn, - state_index=index, - ), + definition=split_defn, submitter=self.submitter, environment=self.node._environment, name=self.node.name, @@ -686,6 +683,34 @@ def _resolve_lazy_inputs( ) return attrs.evolve(task_def, **resolved) + def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: + """Split the definition into the different states it will be run over""" + # TODO: doesn't work properly for more cmplicated wf (check if still an issue) + if not self.node.state: + return {None: self.node._definition} + split_defs = {} + for input_ind in self.node.state.inputs_ind: + inputs_dict = {} + for inp in set(self.node.input_names): + if f"{self.node.name}.{inp}" in input_ind: + value = getattr(self.node._definition, inp) + if isinstance(value, LazyField): + inputs_dict[inp] = value._get_value( + workflow=self.workflow, + graph=self.graph, + state_index=StateIndex(input_ind), + ) + else: + inputs_dict[inp] = self.node._extract_input_el( + inputs=self.node._definition, + inp_nm=inp, + ind=input_ind[f"{self.node.name}.{inp}"], + ) + split_defs[StateIndex(input_ind)] = attrs.evolve( + self.node._definition, **inputs_dict + ) + return split_defs + def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: """For a given node, check to see which tasks have been successfully run, are ready to run, can't be run due to upstream errors, or are blocked on other tasks to complete. diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index ec7b13c569..f42b08db6d 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1163,7 +1163,7 @@ def Workflow(x, y): mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out - wf = Workflow.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") + wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1186,8 +1186,8 @@ def test_wf_3nd_ndst_2(plugin, tmpdir): @workflow.define def Workflow(x, y): - add2x = workflow.add(Add2().split("x", x=x)) - add2y = workflow.add(Add2().split("x", x=y)) + add2x = workflow.add(Add2().split("x", x=x), name="add2x") + add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add(Multiply(x=add2x.out, y=add2y.out).combine("add2x.x")) return mult.out @@ -1215,7 +1215,7 @@ def Workflow(x, y): mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out - wf = Workflow.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") + wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1245,7 +1245,7 @@ def Workflow(x, y): wf = Workflow(x=[1, 2, 3], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + with Submitter(worker="debug", cache_dir=tmpdir) as sub: results = sub(wf) assert not results.errored, "\n".join(results.errors["error message"]) @@ -1268,7 +1268,7 @@ def Workflow(x, y): mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out - wf = Workflow.split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) + wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1322,7 +1322,11 @@ def Workflow(x, y, z): addvar = workflow.add(FunAddVar3(a=add2x.out, b=add2y.out, c=z)) return addvar.out - wf = Workflow.split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]).combine("y") + wf = ( + Workflow() + .split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]) + .combine("y") + ) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) @@ -1828,7 +1832,7 @@ def test_wf_ndst_singl_1(plugin, tmpdir): @workflow.define def Workflow(x, y): - mult = workflow.add(Multiply(y=y).split("x", x=x)) + mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) return add2.out @@ -1855,7 +1859,7 @@ def Workflow(x, y): mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) return mult.out - wf = Workflow.split("x", x=[1, 2, 3], y=11) + wf = Workflow().split("x", x=[1, 2, 3], y=11) with Submitter(worker=plugin, cache_dir=tmpdir) as sub: results = sub(wf) From 75a29830280b8bd54b492154f3e36d598970066b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 4 Mar 2025 12:00:56 +1100 Subject: [PATCH 305/342] fixed check for missing fields --- pydra/engine/specs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 3f65e752e2..5163e0d9e0 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -509,7 +509,7 @@ def _check_rules(self): # Raise error if any required field is unset. if ( - value + (value is None or value is False) and field.requires and not any(rs.satisfied(self) for rs in field.requires) ): From 7b3aeb5058ccdbccb06486195c6bae82f748ccec Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 4 Mar 2025 12:03:18 +1100 Subject: [PATCH 306/342] fixed splitting of tasks over states where value comes from lazy field --- pydra/engine/core.py | 41 ------------- pydra/engine/helpers.py | 57 +++++++++++++++++- pydra/engine/node.py | 70 +---------------------- pydra/engine/state.py | 26 +++++++++ pydra/engine/submitter.py | 29 +++++----- pydra/engine/tests/test_numpy_examples.py | 3 +- pydra/engine/tests/test_workflow.py | 13 +++-- 7 files changed, 106 insertions(+), 133 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index e68f4959f4..5619966700 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -916,47 +916,6 @@ def _create_graph( ) return graph - def create_dotfile(self, type="simple", export=None, name=None, output_dir=None): - """creating a graph - dotfile and optionally exporting to other formats""" - outdir = output_dir if output_dir is not None else self.cache_dir - graph = self.graph - if not name: - name = f"graph_{self._node.name}" - if type == "simple": - for task in graph.nodes: - self.create_connections(task) - dotfile = graph.create_dotfile_simple(outdir=outdir, name=name) - elif type == "nested": - for task in graph.nodes: - self.create_connections(task) - dotfile = graph.create_dotfile_nested(outdir=outdir, name=name) - elif type == "detailed": - # create connections with detailed=True - for task in graph.nodes: - self.create_connections(task, detailed=True) - # adding wf outputs - for wf_out, lf in self._connections: - graph.add_edges_description( - (self._node.name, wf_out, lf._node.name, lf.field) - ) - dotfile = graph.create_dotfile_detailed(outdir=outdir, name=name) - else: - raise Exception( - f"type of the graph can be simple, detailed or nested, " - f"but {type} provided" - ) - if not export: - return dotfile - else: - if export is True: - export = ["png"] - elif isinstance(export, str): - export = [export] - formatted_dot = [] - for ext in export: - formatted_dot.append(graph.export_graph(dotfile=dotfile, ext=ext)) - return dotfile, formatted_dot - def is_workflow(obj): """Check whether an object is a :class:`Workflow` instance.""" diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 4e0b3900e7..fb7eabcc60 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -18,7 +18,7 @@ from fileformats.core import FileSet if ty.TYPE_CHECKING: - from .specs import TaskDef, Result, WorkflowOutputs + from .specs import TaskDef, Result, WorkflowOutputs, WorkflowDef from .core import Task from pydra.design.base import Field @@ -28,6 +28,61 @@ DefType = ty.TypeVar("DefType", bound="TaskDef") +def plot_workflow( + workflow_task: "WorkflowDef", + out_dir: Path, + type="simple", + export=None, + name=None, + output_dir=None, +): + """creating a graph - dotfile and optionally exporting to other formats""" + from .core import Workflow + + # Create output directory + out_dir.mkdir(parents=True, exist_ok=True) + + # Construct the workflow object + wf = Workflow.construct(workflow_task) + graph = wf.graph + if not name: + name = f"graph_{wf._node.name}" + if type == "simple": + for task in graph.nodes: + wf.create_connections(task) + dotfile = graph.create_dotfile_simple(outdir=out_dir, name=name) + elif type == "nested": + for task in graph.nodes: + wf.create_connections(task) + dotfile = graph.create_dotfile_nested(outdir=out_dir, name=name) + elif type == "detailed": + # create connections with detailed=True + for task in graph.nodes: + wf.create_connections(task, detailed=True) + # adding wf outputs + for wf_out, lf in wf._connections: + graph.add_edges_description( + (wf._node.name, wf_out, lf._node.name, lf.field) + ) + dotfile = graph.create_dotfile_detailed(outdir=out_dir, name=name) + else: + raise Exception( + f"type of the graph can be simple, detailed or nested, " + f"but {type} provided" + ) + if not export: + return dotfile + else: + if export is True: + export = ["png"] + elif isinstance(export, str): + export = [export] + formatted_dot = [] + for ext in export: + formatted_dot.append(graph.export_graph(dotfile=dotfile, ext=ext)) + return dotfile, formatted_dot + + def attrs_fields(definition, exclude_names=()) -> list[attrs.Attribute]: """Get the fields of a definition, excluding some names.""" return [ diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 29cf3aedb0..661418d034 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -1,18 +1,15 @@ import typing as ty -from copy import deepcopy, copy +from copy import deepcopy from enum import Enum import attrs from pydra.utils.typing import TypeParser, StateArray from . import lazy from pydra.engine.helpers import ( - ensure_list, attrs_values, is_lazy, - create_checksum, ) -from pydra.utils.hash import hash_function from pydra.engine import helpers_state as hlpst -from pydra.engine.state import State, StateIndex +from pydra.engine.state import State if ty.TYPE_CHECKING: from .core import Workflow @@ -172,51 +169,6 @@ def combiner(self): return () return self._state.combiner - def _checksum_states(self, state_index: StateIndex = StateIndex()): - """ - Calculate a checksum for the specific state or all of the states of the task. - Replaces state-arrays in the inputs fields with a specific values for states. - Used to recreate names of the task directories, - - Parameters - ---------- - state_index : - TODO - - """ - # if is_workflow(self) and self._definition._graph_checksums is attr.NOTHING: - # self._definition._graph_checksums = { - # nd.name: nd.checksum for nd in self.graph_sorted - # } - from pydra.engine.specs import WorkflowDef - - if state_index: - inputs_copy = copy(self._definition) - for key, ind in self.state.inputs_ind[state_index].items(): - val = self._extract_input_el( - inputs=self._definition, inp_nm=key.split(".")[1], ind=ind - ) - setattr(inputs_copy, key.split(".")[1], val) - # setting files_hash again in case it was cleaned by setting specific element - # that might be important for outer splitter of input variable with big files - # the file can be changed with every single index even if there are only two files - input_hash = inputs_copy.hash - if isinstance(self._definition, WorkflowDef): - con_hash = hash_function(self._connections) - # TODO: hash list is not used - hash_list = [input_hash, con_hash] # noqa: F841 - checksum_ind = create_checksum( - self.__class__.__name__, self._checksum_wf(input_hash) - ) - else: - checksum_ind = create_checksum(self.__class__.__name__, input_hash) - return checksum_ind - else: - checksum_list = [] - for ind in range(len(self.state.inputs_ind)): - checksum_list.append(self._checksum_states(state_index=ind)) - return checksum_list - def _check_if_outputs_have_been_used(self, msg): used = [] if self._lzout: @@ -287,24 +239,6 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: upstream_states[node.name][1].append(inpt_name) return upstream_states - def _extract_input_el(self, inputs, inp_nm, ind): - """ - Extracting element of the inputs taking into account - container dimension of the specific element that can be set in self.state.cont_dim. - If input name is not in cont_dim, it is assumed that the input values has - a container dimension of 1, so only the most outer dim will be used for splitting. - If - """ - if f"{self.name}.{inp_nm}" in self.state.cont_dim: - return list( - hlpst.flatten( - ensure_list(getattr(inputs, inp_nm)), - max_depth=self.state.cont_dim[f"{self.name}.{inp_nm}"], - ) - )[ind] - else: - return getattr(inputs, inp_nm)[ind] - # else: # # todo it never gets here # breakpoint() diff --git a/pydra/engine/state.py b/pydra/engine/state.py index d078c1065f..201a5783b5 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -1253,3 +1253,29 @@ def _single_op_splits(self, op_single): val = op["*"](val_ind) keys = [op_single] return val, keys + + def _get_element(self, value: ty.Any, field_name: str, ind: int): + """ + Extracting element of the inputs taking into account + container dimension of the specific element that can be set in self.state.cont_dim. + If input name is not in cont_dim, it is assumed that the input values has + a container dimension of 1, so only the most outer dim will be used for splitting. + + Parameters + ---------- + value : Any + inputs of the task + field_name : str + name of the input field + ind : int + index of the element + """ + if f"{self.name}.{field_name}" in self.cont_dim: + return list( + hlpst.flatten( + ensure_list(value), + max_depth=self.cont_dim[f"{self.name}.{field_name}"], + ) + )[ind] + else: + return value[ind] diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index afbdbc3bfd..6858fadf1b 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -690,24 +690,23 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: return {None: self.node._definition} split_defs = {} for input_ind in self.node.state.inputs_ind: - inputs_dict = {} + resolved = {} for inp in set(self.node.input_names): + value = getattr(self.node._definition, inp) + if isinstance(value, LazyField): + value = resolved[inp] = value._get_value( + workflow=self.workflow, + graph=self.graph, + state_index=StateIndex(input_ind), + ) if f"{self.node.name}.{inp}" in input_ind: - value = getattr(self.node._definition, inp) - if isinstance(value, LazyField): - inputs_dict[inp] = value._get_value( - workflow=self.workflow, - graph=self.graph, - state_index=StateIndex(input_ind), - ) - else: - inputs_dict[inp] = self.node._extract_input_el( - inputs=self.node._definition, - inp_nm=inp, - ind=input_ind[f"{self.node.name}.{inp}"], - ) + resolved[inp] = self.node.state._get_element( + value=value, + field_name=inp, + ind=input_ind[f"{self.node.name}.{inp}"], + ) split_defs[StateIndex(input_ind)] = attrs.evolve( - self.node._definition, **inputs_dict + self.node._definition, **resolved ) return split_defs diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index 08b3907081..e9acc379cd 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -81,8 +81,7 @@ def test_task_numpyinput_1(tmp_path: Path): nn = Identity().split(x=[np.array([1, 2]), np.array([3, 4])]) # checking the results outputs = nn(cache_dir=tmp_path) - assert (outputs.out[0] == np.array([1, 2])).all() - assert (outputs.out[1] == np.array([3, 4])).all() + assert (np.array(outputs.out) == np.array([[1, 2], [3, 4]])).all() def test_task_numpyinput_2(tmp_path: Path): diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index f42b08db6d..1ce9364199 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -37,6 +37,7 @@ ) from pydra.engine.submitter import Submitter from pydra.design import python, workflow +import pydra.engine.core from pydra.utils import exc_info_matches @@ -959,8 +960,7 @@ def Workflow(x, y): assert not results.errored, "\n".join(results.errors["error message"]) - assert results.outputs.out[0] == [13, 24, 35] - assert results.outputs.out[1] == [14, 26, 38] + assert results.outputs.out == [[13, 24, 35], [14, 26, 38]] def test_wf_ndst_7(plugin, tmpdir): @@ -3735,13 +3735,14 @@ def Workflow1(x, y): def create_tasks(): @workflow.define def Workflow(x): - t1 = workflow.add(Add2(x=x)) - t2 = workflow.add(Multiply(x=t1.out, y=2)) + t1 = workflow.add(Add2(x=x), name="t1") + t2 = workflow.add(Multiply(x=t1.out, y=2), name="t2") return t2.out wf = Workflow(x=1) - t1 = wf.name2obj["t1"] - t2 = wf.name2obj["t2"] + workflow_obj = pydra.engine.core.Workflow.construct(wf) + t1 = workflow_obj["t1"] + t2 = workflow_obj["t2"] return wf, t1, t2 From 9344e1ce9eb267a9de085005d55fc6972fe4db00 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 4 Mar 2025 16:59:25 +1100 Subject: [PATCH 307/342] debugging hashing of function AST --- pydra/utils/hash.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index a16b84f63f..9dd8b20ecc 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -633,23 +633,26 @@ def dump_ast(node: ast.AST) -> bytes: def strip_annotations(node: ast.AST): """Remove annotations from function arguments.""" - for arg in node.args.args: - arg.annotation = None - for arg in node.args.kwonlyargs: - arg.annotation = None - if node.args.vararg: - node.args.vararg.annotation = None - if node.args.kwarg: - node.args.kwarg.annotation = None + if hasattr(node, "args"): + for arg in node.args.args: + arg.annotation = None + for arg in node.args.kwonlyargs: + arg.annotation = None + if node.args.vararg: + node.args.vararg.annotation = None + if node.args.kwarg: + node.args.kwarg.annotation = None indent = re.match(r"(\s*)", src).group(1) if indent: src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) func_ast = ast.parse(src).body[0] strip_annotations(func_ast) - yield dump_ast(func_ast.args) - for stmt in func_ast.body: - yield dump_ast(stmt) + if hasattr(func_ast, "args"): + yield dump_ast(func_ast.args) + if hasattr(func_ast, "body"): + for stmt in func_ast.body: + yield dump_ast(stmt) yield b")" From 8a29e1e91f7a45d3a41892359dd89b68f4047461 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 4 Mar 2025 16:59:48 +1100 Subject: [PATCH 308/342] debugging optional shell outputs --- pydra/design/base.py | 4 +- pydra/engine/helpers_file.py | 6 ++- pydra/engine/specs.py | 21 ++++++++-- pydra/engine/tests/test_shelltask.py | 58 +++++++++++++--------------- 4 files changed, 52 insertions(+), 37 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 0b237af33d..f4b4dcef3c 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -715,9 +715,9 @@ def make_converter( field_type, label=checker_label, superclass_auto_cast=True ) converters = [] - if field.type in (MultiInputObj, MultiInputFile): + if field_type in (MultiInputObj, MultiInputFile): converters.append(ensure_list) - elif field.type in (MultiOutputObj, MultiOutputFile): + elif field_type in (MultiOutputObj, MultiOutputFile): converters.append(from_list_if_single) if field.converter: converters.append(field.converter) diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 567fa3cc5e..0b19b075a5 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -334,7 +334,11 @@ def _single_template_formatting( formatted_value = _element_formatting( template, val_dict, file_template, keep_extension=field.keep_extension ) - return Path(formatted_value) if formatted_value is not None else formatted_value + if isinstance(formatted_value, list): + return [Path(val) for val in formatted_value] + elif isinstance(formatted_value, str): + return Path(formatted_value) + return None def _element_formatting( diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 5163e0d9e0..0e21d874c7 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -32,7 +32,12 @@ from . import helpers_state as hlpst from . import lazy from pydra.utils.hash import hash_function, Cache -from pydra.utils.typing import StateArray, is_multi_input +from pydra.utils.typing import ( + StateArray, + is_multi_input, + MultiOutputObj, + MultiOutputFile, +) from pydra.design.base import Field, Arg, Out, RequirementSet, NO_DEFAULT from pydra.design import shell @@ -846,7 +851,17 @@ def _from_task(cls, task: "Task[ShellDef]") -> Self: else: resolved_value = cls._resolve_value(fld, task) # Set the resolved value - setattr(outputs, fld.name, resolved_value) + try: + setattr(outputs, fld.name, resolved_value) + except FileNotFoundError as e: + if is_optional(fld.type): + setattr(outputs, fld.name, None) + else: + e.add_note( + f"file system path provided to {fld.name!r}, {resolved_value}, " + f"does not exist, this is likely due to an error in the task {task}" + ) + raise return outputs @classmethod @@ -1129,7 +1144,7 @@ def _command_pos_args( # if False, nothing is added to the command. if value is True: cmd_add.append(field.argstr) - elif is_multi_input(tp): + elif is_multi_input(tp) or tp is MultiOutputObj or tp is MultiOutputFile: # if the field is MultiInputObj, it is used to create a list of arguments for val in value or []: cmd_add += self._format_arg(field, val) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 3e362a9a70..c8b5f52005 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -3062,7 +3062,7 @@ def no_fsl(): @pytest.mark.skipif(no_fsl(), reason="fsl is not installed") -def test_fsl(data_tests_dir): +def test_fsl(data_tests_dir, tmp_path): """mandatory field added to fields, value provided""" _xor_inputs = [ @@ -3232,7 +3232,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): input: File = shell.arg(argstr="", help="input file") class Outputs(ShellOutputs): - output: File = shell.outarg( + output: File | None = shell.outarg( argstr="", path_template="out.txt", help="dummy output", @@ -3252,7 +3252,7 @@ class Outputs(ShellOutputs): def test_shell_cmd_non_existing_outputs_1(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead""" + but return None instead""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): @@ -3264,25 +3264,24 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out_1: File = shell.outarg( + out_1: File | None = shell.out( help="fictional output #1", - path_template="{out_name}_1.nii", + callable=lambda: "out_1.nii", ) - out_2: File = shell.outarg( + out_2: File | None = shell.out( help="fictional output #2", - path_template="{out_name}_2.nii", + callable=lambda: "out_2.nii", ) - shelly = Shelly( - out_name="test", - ) - outputs = shelly() - assert outputs.out_1 == attr.NOTHING and outputs.out_2 == attr.NOTHING + shelly = Shelly(out_name="test") + outputs = shelly(cache_dir=tmp_path) + assert outputs.out_1 is None + assert outputs.out_2 is None def test_shell_cmd_non_existing_outputs_2(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead. This test has one existing and one non existing output file. + but return None instead. This test has one existing and one non existing output file. """ @shell.define @@ -3306,7 +3305,7 @@ class Outputs(ShellOutputs): ) shelly = Shelly(out_name="test") - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) # the first output file is created assert outputs.out_1.fspath == next(tmp_path.iterdir()) / "test_1.nii" assert outputs.out_1.fspath.exists() @@ -3316,7 +3315,8 @@ class Outputs(ShellOutputs): def test_shell_cmd_non_existing_outputs_3(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead. This test has an existing mandatory output and another non existing output file. + but return None instead. This test has an existing mandatory output and another + non existing output file. """ @shell.define @@ -3331,55 +3331,51 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): out_1: File = shell.outarg( - help="fictional output #1", - path_template="{out_name}_1.nii", + help="real output #1", + default="{out_name}_1.nii", ) - out_2: File = shell.outarg( + out_2: File | None = shell.outarg( help="fictional output #2", - path_template="{out_name}_2.nii", + default="{out_name}_2.nii", ) shelly = Shelly(out_name="test") - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) # the first output file is created assert outputs.out_1.fspath == next(tmp_path.iterdir()) / "test_1.nii" assert outputs.out_1.fspath.exists() # the second output file is not created - assert outputs.out_2 == attr.NOTHING + assert outputs.out_2 is None def test_shell_cmd_non_existing_outputs_4(tmp_path): """Checking that non existing output files do not return a phantom path, - but return NOTHING instead. This test has an existing mandatory output and another non existing + but return None instead. This test has an existing mandatory output and another non existing mandatory output file.""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = "touch" out_name: str = shell.arg( - help=""" - base name of the pretend outputs. - """, + help="""base name of the pretend outputs.""", argstr="{out_name}_1.nii", ) class Outputs(ShellOutputs): out_1: File = shell.outarg( help="fictional output #1", - path_template="{out_name}_1.nii", + default="{out_name}_1.nii", ) out_2: File = shell.outarg( help="fictional output #2", - path_template="{out_name}_2.nii", + default="{out_name}_2.nii", ) - shelly = Shelly( - out_name="test", - ) + shelly = Shelly(out_name="test") # An exception should be raised because the second mandatory output does not exist with pytest.raises(Exception) as excinfo: - shelly() + shelly(cache_dir=tmp_path) assert "mandatory output for variable out_2 does not exist" == str(excinfo.value) # checking if the first output was created assert (next(tmp_path.iterdir()) / "test_1.nii").exists() From b33bbdf1a359db4cf2fc5f07342ddd30360a3456 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 5 Mar 2025 21:25:37 +1100 Subject: [PATCH 309/342] debugging shell task tests --- new_file_1.txt | 0 pydra/design/base.py | 26 ++- pydra/design/shell.py | 10 +- pydra/engine/environments.py | 4 +- pydra/engine/helpers.py | 10 +- pydra/engine/specs.py | 205 +++++++++++------- pydra/engine/tests/test_shelltask.py | 182 ++++++++-------- .../engine/tests/test_shelltask_inputspec.py | 84 +++---- pydra/utils/typing.py | 23 +- 9 files changed, 320 insertions(+), 224 deletions(-) create mode 100644 new_file_1.txt diff --git a/new_file_1.txt b/new_file_1.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pydra/design/base.py b/pydra/design/base.py index f4b4dcef3c..d25301d263 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -220,6 +220,14 @@ def requirements_satisfied(self, inputs: "TaskDef") -> bool: def mandatory(self): return self.default is NO_DEFAULT + @requires.validator + def _requires_validator(self, _, value): + if value and self.type not in (ty.Any, bool) and not is_optional(self.type): + raise ValueError( + f"Fields with requirements must be of boolean or optional type, " + f"not type {self.type} ({self!r})" + ) + @attrs.define(kw_only=True) class Arg(Field): @@ -262,8 +270,16 @@ class Arg(Field): copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single readonly: bool = False + @xor.validator + def _xor_validator(self, _, value): + if value and self.type not in (ty.Any, bool) and not is_optional(self.type): + raise ValueError( + f"Fields that have 'xor' must be of boolean or optional type, " + f"not type {self.type} ({self!r})" + ) -@attrs.define(kw_only=True) + +@attrs.define(kw_only=True, slots=False) class Out(Field): """Base class for output fields of task definitions @@ -473,10 +489,14 @@ def make_task_def( if getattr(arg, "path_template", False): if is_optional(arg.type): field_type = Path | bool | None - attrs_kwargs = {"default": None} + if arg.default is NO_DEFAULT: + attrs_kwargs["default"] = True if arg.requires else None + del attrs_kwargs["factory"] else: field_type = Path | bool - attrs_kwargs = {"default": True} # use the template by default + if arg.default is NO_DEFAULT: + attrs_kwargs["default"] = True # use the template by default + del attrs_kwargs["factory"] elif is_optional(arg.type): field_type = Path | None else: diff --git a/pydra/design/shell.py b/pydra/design/shell.py index ebdee524c9..7eb99d0508 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -127,7 +127,7 @@ def _validate_sep(self, _, sep): and issubclass(origin, ty.Sequence) and tp is not str ): - if sep is None: + if sep is None and not self.readonly: raise ValueError( f"A value to 'sep' must be provided when type is iterable {tp} " f"for field {self.name!r}" @@ -251,10 +251,10 @@ def _validate_path_template(self, attribute, value): f"path_template ({value!r}) can only be provided when type is a FileSet, " f"or union thereof, not {self.type!r}" ) - if self.argstr is None: - raise ValueError( - f"path_template ({value!r}) can only be provided when argstr is not None" - ) + # if self.argstr is None: + # raise ValueError( + # f"path_template ({value!r}) can only be provided when argstr is not None" + # ) @keep_extension.validator def _validate_keep_extension(self, attribute, value): diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 04f90d49e1..02efd902e4 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -179,7 +179,7 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_args + [docker_img] + task.definition._command_args( - root=self.root, input_updates=input_updates + root=self.root, value_updates=input_updates ), ) output = dict(zip(keys, values)) @@ -217,7 +217,7 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_args + [singularity_img] + task.definition._command_args( - root=self.root, input_updates=input_updates + root=self.root, value_updates=input_updates ), ) output = dict(zip(keys, values)) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index fb7eabcc60..088bfc6e4c 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -628,7 +628,7 @@ async def __aexit__(self, exc_type, exc_value, traceback): return None -def parse_format_string(fmtstr): +def parse_format_string(fmtstr: str) -> set[str]: """Parse a argstr format string and return all keywords used in it.""" identifier = r"[a-zA-Z_]\w*" attribute = rf"\.{identifier}" @@ -647,6 +647,14 @@ def parse_format_string(fmtstr): return set().union(*all_keywords) - {""} +def fields_in_formatter(formatter: str | ty.Callable[..., str]) -> set[str]: + """Extract all field names from a formatter string or function.""" + if isinstance(formatter, str): + return parse_format_string(formatter) + else: + return set(inspect.signature(formatter).parameters.keys()) + + def ensure_list(obj, tuple2list=False): """ Return a list whatever the input object is. diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 0e21d874c7..2c336547ec 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -18,7 +18,7 @@ import cloudpickle as cp from fileformats.generic import FileSet from pydra.utils.messenger import AuditFlag, Messenger -from pydra.utils.typing import TypeParser, is_optional, optional_type +from pydra.utils.typing import is_optional, optional_type from .helpers import ( attrs_fields, attrs_values, @@ -27,6 +27,7 @@ position_sort, ensure_list, parse_format_string, + fields_in_formatter, ) from .helpers_file import template_update, template_update_single from . import helpers_state as hlpst @@ -35,6 +36,7 @@ from pydra.utils.typing import ( StateArray, is_multi_input, + is_fileset_or_union, MultiOutputObj, MultiOutputFile, ) @@ -481,8 +483,8 @@ def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: } return hash_function(sorted(field_hashes.items())), field_hashes - def _check_rules(self): - """Check if all rules are satisfied.""" + def _rule_violations(self) -> list[str]: + """Check rules and returns a list of errors.""" field: Arg errors = [] @@ -492,14 +494,18 @@ def _check_rules(self): if is_lazy(value): continue - if value is attrs.NOTHING and not getattr(field, "path_template", False): + if ( + value is attrs.NOTHING + and not getattr(field, "path_template", False) + and not field.readonly + ): errors.append(f"Mandatory field {field.name!r} is not set") # Collect alternative fields associated with this field. if field.xor: mutually_exclusive = {name: self[name] for name in field.xor} are_set = [ - f"{n}={v!r}" for n, v in mutually_exclusive.items() if v is not None + f"{n}={v!r}" for n, v in mutually_exclusive.items() if is_set(v) ] if len(are_set) > 1: errors.append( @@ -514,7 +520,15 @@ def _check_rules(self): # Raise error if any required field is unset. if ( - (value is None or value is False) + not ( + value is None + or value is False + or ( + is_optional(field.type) + and is_fileset_or_union(field.type) + and value is True + ) + ) and field.requires and not any(rs.satisfied(self) for rs in field.requires) ): @@ -527,7 +541,14 @@ def _check_rules(self): errors.append( f"{field.name!r} requires{qualification} {[str(r) for r in field.requires]}" ) - if errors: + return errors + + def _check_rules(self): + """Check if all rules are satisfied.""" + + attrs.validate(self) + + if errors := self._rule_violations(): raise ValueError( f"Found the following errors in task {self} definition:\n" + "\n".join(errors) @@ -842,8 +863,8 @@ def _from_task(cls, task: "Task[ShellDef]") -> Self: resolved_value = task.return_values[fld.name] # Get the corresponding value from the inputs if it exists, which will be # passed through to the outputs, to permit manual overrides - elif isinstance(fld, shell.outarg) and is_set( - getattr(task.definition, fld.name) + elif isinstance(fld, shell.outarg) and isinstance( + task.inputs[fld.name], Path ): resolved_value = task.inputs[fld.name] elif is_set(fld.default): @@ -853,15 +874,15 @@ def _from_task(cls, task: "Task[ShellDef]") -> Self: # Set the resolved value try: setattr(outputs, fld.name, resolved_value) - except FileNotFoundError as e: + except FileNotFoundError: if is_optional(fld.type): setattr(outputs, fld.name, None) else: - e.add_note( - f"file system path provided to {fld.name!r}, {resolved_value}, " - f"does not exist, this is likely due to an error in the task {task}" + raise ValueError( + f"file system path(s) provided to mandatory field {fld.name!r}," + f"{resolved_value}, does not exist, this is likely due to an " + f"error in the {task.name!r} task" ) - raise return outputs @classmethod @@ -908,6 +929,8 @@ def _required_fields_satisfied(cls, fld: shell.out, inputs: "ShellDef") -> bool: # if a template is a function it has to be run first with the inputs as the only arg if callable(fld.path_template): template = fld.path_template(inputs) + else: + template = fld.path_template inp_fields = re.findall(r"{(\w+)(?:\:[^\}]+)?}", template) for req in requirements: req += inp_fields @@ -996,9 +1019,6 @@ def _run(self, task: "Task[ShellDef]", rerun: bool = True) -> None: def cmdline(self) -> str: """The equivalent command line that would be submitted if the task were run on the current working directory.""" - # checking the inputs fields before returning the command line - self._check_resolved() - self._check_rules() # Skip the executable, which can be a multi-part command, e.g. 'docker run'. cmd_args = self._command_args() cmdline = cmd_args[0] @@ -1015,45 +1035,52 @@ def cmdline(self) -> str: def _command_args( self, output_dir: Path | None = None, - input_updates: dict[str, ty.Any] | None = None, + value_updates: dict[str, ty.Any] | None = None, root: Path | None = None, ) -> list[str]: """Get command line arguments""" if output_dir is None: output_dir = Path.cwd() self._check_resolved() - inputs = attrs_values(self) - inputs.update(template_update(self, output_dir=output_dir)) - if input_updates: - inputs.update(input_updates) - pos_args = [] # list for (position, command arg) - positions_provided = [] + self._check_rules() + values = attrs_values(self) + template_values = template_update(self, output_dir=output_dir) + values.update(template_values) + if value_updates: + values.update(value_updates) + # Drop none/empty values and optional path fields that are set to false for field in list_fields(self): - name = field.name - value = inputs[name] - if value is None or is_multi_input(field.type) and value == []: - continue - if name == "executable": - pos_args.append(self._command_shelltask_executable(field, value)) - elif name == "additional_args": - continue - else: - pos_val = self._command_pos_args( - field=field, - value=value, - inputs=inputs, - root=root, - output_dir=output_dir, - positions_provided=positions_provided, - ) - if pos_val: - pos_args.append(pos_val) - + fld_value = values[field.name] + if fld_value is None or (is_multi_input(field.type) and fld_value == []): + del values[field.name] + if is_fileset_or_union(field.type) and type(fld_value) is bool: + assert field.path_template and field.name not in template_values + del values[field.name] + # Drop special fields that are added separately + del values["executable"] + del values["additional_args"] + # Add executable + pos_args = [ + self._command_shelltask_executable(field, self.executable), + ] # list for (position, command arg) + positions_provided = [0] + fields = {f.name: f for f in list_fields(self)} + for field_name in values: + pos_val = self._command_pos_args( + field=fields[field_name], + values=values, + root=root, + output_dir=output_dir, + positions_provided=positions_provided, + ) + if pos_val: + pos_args.append(pos_val) # Sort command and arguments by position cmd_args = position_sort(pos_args) # pos_args values are each a list of arguments, so concatenate lists after sorting command_args = sum(cmd_args, []) - command_args += inputs["additional_args"] + # Append additional arguments to the end of the command + command_args += self.additional_args return command_args def _command_shelltask_executable( @@ -1077,8 +1104,7 @@ def _command_shelltask_args( def _command_pos_args( self, field: shell.arg, - value: ty.Any, - inputs: dict[str, ty.Any], + values: dict[str, ty.Any], output_dir: Path, positions_provided: list[str], root: Path | None = None, @@ -1087,6 +1113,9 @@ def _command_pos_args( Checking all additional input fields, setting pos to None, if position not set. Creating a list with additional parts of the command that comes from the specific field. + + Parameters + ---------- """ if field.argstr is None and field.formatter is None: # assuming that input that has no argstr is not used in the command, @@ -1105,11 +1134,12 @@ def _command_pos_args( positions_provided.append(field.position) + value = values[field.name] if value and isinstance(value, str): if root: # values from templates value = value.replace(str(output_dir), f"{root}{output_dir}") - if field.readonly and value is not None: + if field.readonly and type(value) is not bool and value is not attrs.NOTHING: raise Exception(f"{field.name} is read only, the value can't be provided") elif value is None and not field.readonly and field.formatter is None: return None @@ -1125,10 +1155,10 @@ def _command_pos_args( if argnm == "field": call_args_val[argnm] = value elif argnm == "inputs": - call_args_val[argnm] = inputs + call_args_val[argnm] = values else: - if argnm in inputs: - call_args_val[argnm] = inputs[argnm] + if argnm in values: + call_args_val[argnm] = values[argnm] else: raise AttributeError( f"arguments of the formatter function from {field.name} " @@ -1147,13 +1177,16 @@ def _command_pos_args( elif is_multi_input(tp) or tp is MultiOutputObj or tp is MultiOutputFile: # if the field is MultiInputObj, it is used to create a list of arguments for val in value or []: - cmd_add += self._format_arg(field, val) + split_values = copy(values) + split_values[field.name] = val + cmd_add += self._format_arg(field, split_values) else: - cmd_add += self._format_arg(field, value) + cmd_add += self._format_arg(field, values) return field.position, cmd_add - def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: + def _format_arg(self, field: shell.arg, values: dict[str, ty.Any]) -> list[str]: """Returning arguments used to specify the command args for a single inputs""" + value = values[field.name] if ( field.argstr.endswith("...") and isinstance(value, ty.Iterable) @@ -1164,11 +1197,9 @@ def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: if "{" in field.argstr and "}" in field.argstr: argstr_formatted_l = [] for val in value: - argstr_f = argstr_formatting( - field.argstr, - self, - value_updates={field.name: val}, - ) + split_values = copy(values) + split_values[field.name] = val + argstr_f = argstr_formatting(field.argstr, split_values) argstr_formatted_l.append(f" {argstr_f}") cmd_el_str = field.sep.join(argstr_formatted_l) else: # argstr has a simple form, e.g. "-f", or "--f" @@ -1182,7 +1213,7 @@ def _format_arg(self, field: shell.arg, value: ty.Any) -> list[str]: # if argstr has a more complex form, with "{input_field}" if "{" in field.argstr and "}" in field.argstr: cmd_el_str = field.argstr.replace(f"{{{field.name}}}", str(value)) - cmd_el_str = argstr_formatting(cmd_el_str, self) + cmd_el_str = argstr_formatting(cmd_el_str, values) else: # argstr has a simple form, e.g. "-f", or "--f" if value: cmd_el_str = f"{field.argstr} {value}" @@ -1206,6 +1237,37 @@ def _generated_output_names(self, stdout: str, stderr: str): output_names.append(fld.name) return output_names + def _rule_violations(self) -> list[str]: + + errors = super()._rule_violations() + # if there is a value that has to be updated (e.g. single value from a list) + # getting all fields that should be formatted, i.e. {field_name}, ... + fields = list_fields(self) + available_template_names = [f.name for f in fields] + ["field", "inputs"] + for field in fields: + if field.argstr: + if unrecognised := [ + f + for f in parse_format_string(field.argstr) + if f not in available_template_names + ]: + errors.append( + f"Unrecognised field names in the argstr of {field.name} " + f"({field.argstr}): {unrecognised}" + ) + if getattr(field, "path_template", None): + if unrecognised := [ + f + for f in fields_in_formatter(field.path_template) + if f not in available_template_names + ]: + errors.append( + f"Unrecognised field names in the path_template of {field.name} " + f"({field.path_template}): {unrecognised}" + ) + + return errors + DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent @@ -1237,34 +1299,15 @@ def split_cmd(cmd: str | None): return cmd_args -def argstr_formatting( - argstr: str, inputs: TaskDef[OutputsType], value_updates: dict[str, ty.Any] = None -): +def argstr_formatting(argstr: str, values: dict[str, ty.Any]): """formatting argstr that have form {field_name}, using values from inputs and updating with value_update if provided """ # if there is a value that has to be updated (e.g. single value from a list) # getting all fields that should be formatted, i.e. {field_name}, ... - inputs_dict = attrs_values(inputs) - if value_updates: - inputs_dict.update(value_updates) inp_fields = parse_format_string(argstr) - val_dict = {} - for fld_name in inp_fields: - fld_value = inputs_dict[fld_name] - fld_attr = getattr(attrs.fields(type(inputs)), fld_name) - if fld_value is None or ( - fld_value is False - and fld_attr.type is not bool - and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) - ): - # if value is NOTHING, nothing should be added to the command - val_dict[fld_name] = "" - else: - val_dict[fld_name] = fld_value - # formatting string based on the val_dict - argstr_formatted = argstr.format(**val_dict) + argstr_formatted = argstr.format(**{n: values.get(n, "") for n in inp_fields}) # removing extra commas and spaces after removing the field that have NOTHING argstr_formatted = ( argstr_formatted.replace("[ ", "[") diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index c8b5f52005..5d7b99f407 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -429,7 +429,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd_exec) with pytest.raises(Exception) as excinfo: - shelly() + shelly(cache_dir=tmp_path) assert "mandatory" in str(excinfo.value) @@ -647,7 +647,7 @@ class Outputs(ShellOutputs): shelly = Shelly(opt_t=cmd_t, opt_S=cmd_S) with pytest.raises(Exception) as excinfo: - shelly() + shelly(cache_dir=tmp_path) assert "is mutually exclusive" in str(excinfo.value) @@ -712,7 +712,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd_exec, opt_t=cmd_t) with pytest.raises(Exception) as excinfo: - shelly() + shelly(cache_dir=tmp_path) assert "requires" in str(excinfo.value) @@ -1302,7 +1302,7 @@ class Outputs(ShellOutputs): assert fspath.parent.parent == tmp_path -def test_shell_cmd_inputspec_with_iterable(): +def test_shell_cmd_inputspec_with_iterable(tmp_path): """Test formatting of argstr with different iterable types.""" @shell.define @@ -1492,7 +1492,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): assert outputs.stdout[1] == "hi\n" -def test_shell_cmd_inputspec_typeval_1(): +def test_shell_cmd_inputspec_typeval_1(tmp_path): """customized input_spec with a type that doesn't match the value - raise an exception """ @@ -1511,7 +1511,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): Shelly() -def test_shell_cmd_inputspec_typeval_2(): +def test_shell_cmd_inputspec_typeval_2(tmp_path): """customized input_spec (shorter syntax) with a type that doesn't match the value - raise an exception """ @@ -2188,7 +2188,7 @@ class Outputs(ShellOutputs): assert all([file.fspath.exists() for file in outputs.newfile]) -def test_shell_cmd_outputspec_5b_error(): +def test_shell_cmd_outputspec_5b_error(tmp_path): """ customised output_spec, adding files to the output, using a function to collect output, the function is saved in the field metadata @@ -2210,7 +2210,7 @@ class Outputs(ShellOutputs): shelly = Shelly() with pytest.raises(AttributeError, match="ble"): - shelly() + shelly(cache_dir=tmp_path) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2274,7 +2274,7 @@ class Outputs(ShellOutputs): assert outputs.out1.fspath.exists() -def test_shell_cmd_outputspec_6a(): +def test_shell_cmd_outputspec_6a(tmp_path): """ providing output name by providing path_template (using shorter syntax) @@ -2296,7 +2296,7 @@ class Outputs(ShellOutputs): shelly = Shelly(additional_args=args) - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -2439,7 +2439,7 @@ class Outputs(ShellOutputs): assert outputs.stderr_field[index] == f"stderr: {outputs.stderr}" -def test_shell_cmd_outputspec_8b_error(): +def test_shell_cmd_outputspec_8b_error(tmp_path): """ customised output_spec, adding Int to the output, requiring a function to collect output @@ -2458,7 +2458,7 @@ class Outputs(ShellOutputs): shelly = Shelly().split("additional_args", args=args) with pytest.raises(Exception) as e: - shelly() + shelly(cache_dir=tmp_path) assert "has to have a callable" in str(e.value) @@ -2603,7 +2603,7 @@ def Workflow(cmd): assert res.outputs.newfile.fspath.parent.parent == tmp_path -def test_shell_cmd_inputspec_outputspec_1(): +def test_shell_cmd_inputspec_outputspec_1(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in templates """ @@ -2624,13 +2624,13 @@ class Outputs(ShellOutputs): file1=File.mock("new_file_1.txt"), file2=File.mock("new_file_2.txt") ) - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() assert outputs.newfile2.fspath.exists() -def test_shell_cmd_inputspec_outputspec_1a(): +def test_shell_cmd_inputspec_outputspec_1a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in templates, file2 is used in a template for newfile2, but it is not provided, so newfile2 is set to NOTHING @@ -2653,14 +2653,14 @@ class Outputs(ShellOutputs): ) shelly.file1 = File.mock("new_file_1.txt") - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() # newfile2 is not created, since file2 is not provided - assert outputs.newfile2 is attr.NOTHING + assert outputs.newfile2 is None -def test_shell_cmd_inputspec_outputspec_2(): +def test_shell_cmd_inputspec_outputspec_2(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed """ @@ -2695,13 +2695,13 @@ class Outputs(ShellOutputs): == ["newfile1", "newfile2", "return_code", "stderr", "stdout"] ) - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() assert outputs.newfile2.fspath.exists() -def test_shell_cmd_inputspec_outputspec_2a(): +def test_shell_cmd_inputspec_outputspec_2a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed """ @@ -2739,7 +2739,7 @@ class Outputs(ShellOutputs): "stdout", ] - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert shelly._generated_output_names(outputs.stdout, outputs.stderr) == [ "newfile1", "return_code", @@ -2749,10 +2749,10 @@ class Outputs(ShellOutputs): assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() - assert outputs.newfile2 is attr.NOTHING + assert outputs.newfile2 is None -def test_shell_cmd_inputspec_outputspec_3(): +def test_shell_cmd_inputspec_outputspec_3(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input that is not in the template, but in the requires field, @@ -2782,13 +2782,13 @@ class Outputs(ShellOutputs): shelly.file2 = File.mock("new_file_2.txt") shelly.additional_inp = 2 - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() assert outputs.newfile2.fspath.exists() -def test_shell_cmd_inputspec_outputspec_3a(): +def test_shell_cmd_inputspec_outputspec_3a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input that is not in the template, but in the requires field, @@ -2825,21 +2825,24 @@ class Outputs(ShellOutputs): "stderr", "stdout", ] - assert shelly._generated_output_names == [ + + outputs = shelly(cache_dir=tmp_path) + assert shelly._generated_output_names( + stdout=outputs.stdout, stderr=outputs.stderr + ) == [ "newfile1", "return_code", "stderr", "stdout", ] - outputs = shelly() assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() # additional input not provided so no newfile2 set (even if the file was created) - assert outputs.newfile2 is attr.NOTHING + assert outputs.newfile2 is None -def test_shell_cmd_inputspec_outputspec_4(): +def test_shell_cmd_inputspec_outputspec_4(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input to the requires together with a list of the allowed values, @@ -2850,11 +2853,11 @@ def test_shell_cmd_inputspec_outputspec_4(): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - additional_inp: int = shell.arg(help="additional inp") + additional_inp: int | None = shell.arg(help="additional inp", default=None) class Outputs(ShellOutputs): - newfile1: File = shell.outarg( + newfile1: File | None = shell.outarg( path_template="{file1}", help="newfile 1", requires=["file1", ("additional_inp", [2, 3])], @@ -2867,7 +2870,7 @@ class Outputs(ShellOutputs): shelly.additional_inp = 2 # _generated_output_names should be the same as output_names - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert ( get_output_names(shelly) == shelly._generated_output_names(outputs.stdout, outputs.stderr) @@ -2878,7 +2881,7 @@ class Outputs(ShellOutputs): assert outputs.newfile1.fspath.exists() -def test_shell_cmd_inputspec_outputspec_4a(): +def test_shell_cmd_inputspec_outputspec_4a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input to the requires together with a list of the allowed values, @@ -2890,29 +2893,27 @@ def test_shell_cmd_inputspec_outputspec_4a(): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - additional_inp: int = shell.arg(help="additional inp") + additional_inp: int | None = shell.arg(help="additional inp", default=None) class Outputs(ShellOutputs): - newfile1: File = shell.outarg( + newfile1: File | None = shell.outarg( path_template="{file1}", help="newfile 1", requires=["file1", ("additional_inp", [2, 3])], ) - shelly = Shelly( - executable=cmd, - ) + shelly = Shelly(executable=cmd) shelly.file1 = File.mock("new_file_1.txt") # the value is not in the list from requires shelly.additional_inp = 1 - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" - assert outputs.newfile1 is attr.NOTHING + assert outputs.newfile1 is None -def test_shell_cmd_inputspec_outputspec_5(): +def test_shell_cmd_inputspec_outputspec_5(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires is a list of list so it is treated as OR list (i.e. el[0] OR el[1] OR...) @@ -2924,8 +2925,8 @@ def test_shell_cmd_inputspec_outputspec_5(): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - additional_inp_A: int = shell.arg(help="additional inp A") - additional_inp_B: str = shell.arg(help="additional inp B") + additional_inp_A: int | None = shell.arg(help="additional inp A", default=None) + additional_inp_B: str | None = shell.arg(help="additional inp B", default=None) class Outputs(ShellOutputs): @@ -2945,12 +2946,12 @@ class Outputs(ShellOutputs): shelly.file1 = File.mock("new_file_1.txt") shelly.additional_inp_A = 2 - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() -def test_shell_cmd_inputspec_outputspec_5a(): +def test_shell_cmd_inputspec_outputspec_5a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires is a list of list so it is treated as OR list (i.e. el[0] OR el[1] OR...) @@ -2962,8 +2963,8 @@ def test_shell_cmd_inputspec_outputspec_5a(): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - additional_inp_A: str = shell.arg(help="additional inp A") - additional_inp_B: int = shell.arg(help="additional inp B") + additional_inp_A: str | None = shell.arg(help="additional inp A", default=None) + additional_inp_B: int | None = shell.arg(help="additional inp B", default=None) class Outputs(ShellOutputs): @@ -2983,12 +2984,12 @@ class Outputs(ShellOutputs): shelly.file1 = File.mock("new_file_1.txt") shelly.additional_inp_B = 2 - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() -def test_shell_cmd_inputspec_outputspec_5b(): +def test_shell_cmd_inputspec_outputspec_5b(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires is a list of list so it is treated as OR list (i.e. el[0] OR el[1] OR...) @@ -3000,12 +3001,12 @@ def test_shell_cmd_inputspec_outputspec_5b(): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - additional_inp_A: str = shell.arg(help="additional inp A") - additional_inp_B: str = shell.arg(help="additional inp B") + additional_inp_A: str | None = shell.arg(help="additional inp A", default=None) + additional_inp_B: str | None = shell.arg(help="additional inp B", default=None) class Outputs(ShellOutputs): - newfile1: File = shell.outarg( + newfile1: File | None = shell.outarg( path_template="{file1}", help="newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... @@ -3020,13 +3021,13 @@ class Outputs(ShellOutputs): ) shelly.file1 = File.mock("new_file_1.txt") - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" - # neither additional_inp_A nor additional_inp_B is set, so newfile1 is NOTHING - assert outputs.newfile1 is attr.NOTHING + # neither additional_inp_A nor additional_inp_B is set, so newfile1 is None + assert outputs.newfile1 is None -def test_shell_cmd_inputspec_outputspec_6_except(): +def test_shell_cmd_inputspec_outputspec_6_except(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires has invalid syntax - exception is raised @@ -3053,7 +3054,7 @@ class Outputs(ShellOutputs): shelly.file1 = File.mock("new_file_1.txt") with pytest.raises(Exception, match="requires field can be"): - shelly() + shelly(cache_dir=tmp_path) def no_fsl(): @@ -3326,17 +3327,17 @@ class Shelly(ShellDef["Shelly.Outputs"]): help=""" base name of the pretend outputs. """, - argstr="{out_name}_1.nii", + argstr=None, ) class Outputs(ShellOutputs): out_1: File = shell.outarg( help="real output #1", - default="{out_name}_1.nii", + path_template="{out_name}_1.nii", ) - out_2: File | None = shell.outarg( + out_2: File | None = shell.out( help="fictional output #2", - default="{out_name}_2.nii", + callable=lambda out_name: f"{out_name}_2.nii", ) shelly = Shelly(out_name="test") @@ -3363,20 +3364,22 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out_1: File = shell.outarg( - help="fictional output #1", - default="{out_name}_1.nii", + out_1: File = shell.out( + help="real output #1", + callable=lambda out_name: f"{out_name}_1.nii", ) - out_2: File = shell.outarg( + out_2: File = shell.out( help="fictional output #2", - default="{out_name}_2.nii", + callable=lambda out_name: f"{out_name}_2.nii", ) shelly = Shelly(out_name="test") # An exception should be raised because the second mandatory output does not exist - with pytest.raises(Exception) as excinfo: + with pytest.raises( + ValueError, + match=r"file system path provided to mandatory field .* does not exist", + ): shelly(cache_dir=tmp_path) - assert "mandatory output for variable out_2 does not exist" == str(excinfo.value) # checking if the first output was created assert (next(tmp_path.iterdir()) / "test_1.nii").exists() @@ -3395,17 +3398,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out_list: MultiOutputFile = shell.outarg( + out_list: MultiOutputFile | None = shell.out( help="fictional output #1", - path_template="{out_name}", + callable=lambda out_name: out_name, ) shelly = Shelly(out_name=["test_1.nii", "test_2.nii"]) - outputs = shelly() - # checking if the outputs are Nothing - assert outputs.out_list[0] == attr.NOTHING - assert outputs.out_list[1] == attr.NOTHING + # with pytest.raises(ValueError): + outputs = shelly(cache_dir=tmp_path) + assert outputs.out_list == None def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): @@ -3416,33 +3418,24 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = "touch" out_name: MultiInputObj = shell.arg( - help=""" - base name of the pretend outputs. - """, + help="""base name of the pretend outputs.""", sep=" test_1_real.nii", # hacky way of creating an extra file with that name argstr="...", ) class Outputs(ShellOutputs): - out_list: MultiOutputFile = shell.outarg( + out_list: MultiOutputFile | None = shell.out( help="fictional output #1", - path_template="{out_name}_real.nii", + callable=lambda out_name: f"{out_name}_real.nii", ) shelly = Shelly(out_name=["test_1", "test_2"]) - outputs = shelly() + outputs = shelly(cache_dir=tmp_path) # checking if the outputs are Nothing - assert outputs.out_list[0] == File(next(tmp_path.iterdir()) / "test_1_real.nii") - assert outputs.out_list[1] == attr.NOTHING + assert outputs.out_list == [File(next(tmp_path.iterdir()) / "test_1_real.nii")] -@pytest.mark.xfail( - reason=( - "Not sure what the desired behaviour for formatter 5 is. Field is declared as a list " - "but a string containing the formatted arg is passed instead." - ) -) def test_shellspec_formatter_1(tmp_path): """test the input callable 'formatter'.""" @@ -3549,8 +3542,12 @@ class Shelly(ShellDef["Shelly.Outputs"]): """, # When providing a formatter all other metadata options are discarded. formatter=formatter_1, + sep=" ", ) + class Outputs(ShellOutputs): + pass + in1 = ["in11", "in12"] shelly = Shelly(in2="in2").split("in1", in1=in1) assert shelly is not None @@ -3588,20 +3585,23 @@ def test_shellcommand_error_msg(tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - executable = script_path + executable = str(script_path) in1: str = shell.arg(help="a dummy string", argstr="") + class Outputs(ShellOutputs): + pass + shelly = Shelly(in1="hello") with pytest.raises(RuntimeError) as excinfo: - shelly() + shelly(cache_dir=tmp_path) path_str = str(script_path) assert ( str(excinfo.value) - == f"""Error running 'err_msg' task with ['{path_str}', 'hello']: + == f"""Error running 'main' task with ['{path_str}', 'hello']: stderr: {path_str}: line 3: /command-that-doesnt-exist: No such file or directory diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 3972cbc3d6..af871b6269 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -581,10 +581,12 @@ class Outputs(ShellOutputs): outA: File = shell.outarg( help="outA", + argstr=None, path_template="{inpA}_out", ) outB: File = shell.outarg( help="outB", + argstr=None, path_template="{inpB}_out", ) @@ -615,11 +617,11 @@ class Outputs(ShellOutputs): ) # checking if outA and outB in the output fields (outAB should not be) assert get_output_names(shelly) == [ - "return_code", - "stdout", - "stderr", "outA", "outB", + "return_code", + "stderr", + "stdout", ] @@ -634,10 +636,12 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): outA: File = shell.outarg( + argstr=None, help="outA", path_template="{inpA}_out", ) outB: File = shell.outarg( + argstr=None, help="outB", path_template="{inpB}_out", ) @@ -669,11 +673,11 @@ class Outputs(ShellOutputs): ) # checking if outA and outB in the output fields (outAB should not be) assert get_output_names(shelly) == [ - "return_code", - "stdout", - "stderr", "outA", "outB", + "return_code", + "stderr", + "stdout", ] @@ -688,10 +692,12 @@ def test_shell_cmd_inputs_template_4(): class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): outA: File = shell.outarg( + argstr=None, help="outA", path_template="{inpA}_out", ) - outB: str = shell.arg( + outB: File | None = shell.outarg( + argstr=None, help="outB", path_template="{inpB}_out", ) @@ -703,7 +709,7 @@ class Outputs(ShellOutputs): help="inpA", argstr="", ) - inpB: str = shell.arg(position=2, help="inpB", argstr="") + inpB: str | None = shell.arg(position=2, help="inpB", argstr="", default=None) outAB: str = shell.arg( position=-1, help="outAB", @@ -715,11 +721,11 @@ class Outputs(ShellOutputs): # inpB is not provided so outB not in the command line assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" assert get_output_names(shelly) == [ - "return_code", - "stdout", - "stderr", "outA", "outB", + "return_code", + "stderr", + "stdout", ] @@ -775,19 +781,22 @@ class Outputs(ShellOutputs): # template can be formatted (the same way as for templates that has type=str) inpA = File.mock("inpA") shelly = Shelly(inpA=inpA) - assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" + + inpA_path = Path.cwd() / "inpA" + outA_path = Path.cwd() / "inpA_out" + assert shelly.cmdline == f"executable {inpA_path} -o {outA_path}" # a string is provided for outA, so this should be used as the outA value shelly = Shelly(inpA=inpA, outA="outA") - assert shelly.cmdline == "executable inpA -o outA" + assert shelly.cmdline == f"executable {inpA_path} -o outA" # True is provided for outA, so the formatted template should be used as outA value shelly = Shelly(inpA=inpA, outA=True) - assert shelly.cmdline == f"executable inpA -o {Path.cwd() / 'inpA_out'}" + assert shelly.cmdline == f"executable {inpA_path} -o {outA_path}" # False is provided for outA, so the outA shouldn't be used shelly = Shelly(inpA=inpA, outA=False) - assert shelly.cmdline == "executable inpA" + assert shelly.cmdline == f"executable {inpA_path}" def test_shell_cmd_inputs_template_6a(): @@ -1262,6 +1271,7 @@ class Outputs(ShellOutputs): ) inpB: int = shell.arg( help="inpB", + argstr=None, ) shelly = Shelly( @@ -1279,20 +1289,22 @@ def test_shell_cmd_inputs_denoise_image( """example from #279""" @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): + class DenoiseImage(ShellDef["DenoiseImage.Outputs"]): class Outputs(ShellOutputs): - correctedImage: File | None = shell.outarg( + correctedImage: File = shell.outarg( help=""" The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, path_template="{inputImageFilename}_out", + argstr=None, ) noiseImage: File | None = shell.outarg( help=""" The output consists of the noise corrected version of the input image. Optionally, one can also output the estimated noise image. """, path_template="{inputImageFilename}_noise", + argstr=None, ) executable = "executable" @@ -1334,17 +1346,12 @@ class Outputs(ShellOutputs): argstr="-s", ) patch_radius: int = shell.arg( - default=1, - help="Patch radius. Default = 1x1x1", - argstr="-p", + default=1, help="Patch radius. Default = 1x1x1", argstr="-p", position=2 ) search_radius: int = shell.arg( - default=2, - help="Search radius. Default = 2x2x2.", - argstr="-r", + default=2, help="Search radius. Default = 2x2x2.", argstr="-r", position=3 ) - output: str | None = shell.arg( - default=None, + output: str = shell.arg( help="Combined output", argstr="-o [{correctedImage}, {noiseImage}]", position=-1, @@ -1371,67 +1378,68 @@ class Outputs(ShellOutputs): my_input_file.write_text("content") # no input provided - shelly = Shelly( + denoise_image = DenoiseImage( executable="DenoiseImage", ) with pytest.raises(Exception) as e: - shelly.cmdline + denoise_image.cmdline assert "mandatory" in str(e.value).lower() # input file name, noiseImage is not set, so using default value False - shelly = Shelly( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, ) assert ( - shelly.cmdline + denoise_image.cmdline == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{Path.cwd() / 'a_file_out.ext'}]" ) # input file name, noiseImage is set to True, so template is used in the output - shelly = Shelly( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, noiseImage=True, ) assert ( - shelly.cmdline == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 " + denoise_image.cmdline + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 " f"-o [{Path.cwd() / 'a_file_out.ext'}, {str(Path.cwd() / 'a_file_noise.ext')}]" ) # input file name and help_short - shelly = Shelly( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, help_short=True, ) assert ( - shelly.cmdline + denoise_image.cmdline == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -h -o [{Path.cwd() / 'a_file_out.ext'}]" ) - assert get_output_names(shelly) == [ + assert get_output_names(denoise_image) == [ "return_code", - "stdout", "stderr", + "stdout", "correctedImage", "noiseImage", ] # adding image_dimensionality that has allowed_values [2, 3, 4] - shelly = Shelly( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, image_dimensionality=2, ) assert ( - shelly.cmdline + denoise_image.cmdline == f"DenoiseImage -d 2 -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{Path.cwd() / 'a_file_out.ext'}]" ) # adding image_dimensionality that has allowed_values [2, 3, 4] and providing 5 - exception should be raised with pytest.raises(ValueError) as excinfo: - shelly = Shelly( + denoise_image = DenoiseImage( executable="DenoiseImage", inputImageFilename=my_input_file, image_dimensionality=5, diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index d7aa09309d..f757c1a23e 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1036,9 +1036,26 @@ def label_str(self): get_args = staticmethod(get_args) -def is_union(type_: type) -> bool: - """Checks whether a type is a Union, in either ty.Union[T, U] or T | U form""" - return ty.get_origin(type_) in UNION_TYPES +def is_union(type_: type, args: list[type] = None) -> bool: + """Checks whether a type is a Union, in either ty.Union[T, U] or T | U form + + Parameters + ---------- + type_ : type + the type to check + args : list[type], optional + required arguments of the union to check, by default (None) any args will match + + Returns + ------- + is_union : bool + whether the type is a Union type + """ + if ty.get_origin(type_) in UNION_TYPES: + if args is not None: + return ty.get_args(type_) == args + return True + return False def is_optional(type_: type) -> bool: From 090baa515d806f2d8947a15061d5c5cdf0032dc8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Mar 2025 11:27:54 +1100 Subject: [PATCH 310/342] debugged 'xor' arg attribute --- pydra/engine/specs.py | 11 ++---- .../engine/tests/test_shelltask_inputspec.py | 37 +++++++++---------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 2c336547ec..7c9bdd4912 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -504,17 +504,14 @@ def _rule_violations(self) -> list[str]: # Collect alternative fields associated with this field. if field.xor: mutually_exclusive = {name: self[name] for name in field.xor} - are_set = [ - f"{n}={v!r}" for n, v in mutually_exclusive.items() if is_set(v) - ] + are_set = [f"{n}={v!r}" for n, v in mutually_exclusive.items() if v] if len(are_set) > 1: errors.append( - f"Mutually exclusive fields {field.xor} are set together: " - + ", ".join(are_set) + f"Mutually exclusive fields ({', '.join(are_set)}) are set together" ) - elif field.mandatory and not are_set: + elif not are_set: errors.append( - f"At least one of the mutually exclusive fields {field.xor} " + f"At least one of the mutually exclusive fields ({', '.join(field.xor)}) " f"should be set" ) diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index af871b6269..94875e1085 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1453,16 +1453,19 @@ class Outputs(ShellOutputs): @shell.define class SimpleXor(ShellDef["SimpleTaskXor.Outputs"]): - input_1: str = shell.arg( + input_1: str | None = shell.arg( + default=None, help="help", xor=("input_1", "input_2", "input_3"), ) - input_2: bool = shell.arg( + input_2: bool | None = shell.arg( + default=None, help="help", argstr="--i2", xor=("input_1", "input_2", "input_3"), ) - input_3: bool = shell.arg( + input_3: bool | None = shell.arg( + default=None, help="help", xor=("input_1", "input_2", "input_3"), ) @@ -1478,15 +1481,12 @@ def test_task_inputs_mandatory_with_xOR_one_mandatory_is_OK(): """input definition with mandatory inputs""" simple_xor = SimpleXor() simple_xor.input_1 = "Input1" - simple_xor.input_2 = attrs.NOTHING simple_xor._check_rules() def test_task_inputs_mandatory_with_xOR_one_mandatory_out_3_is_OK(): """input definition with mandatory inputs""" simple_xor = SimpleXor() - simple_xor.input_1 = attrs.NOTHING - simple_xor.input_2 = attrs.NOTHING simple_xor.input_3 = True simple_xor._check_rules() @@ -1494,13 +1494,11 @@ def test_task_inputs_mandatory_with_xOR_one_mandatory_out_3_is_OK(): def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): """input definition with mandatory inputs""" simple_xor = SimpleXor() - simple_xor.input_1 = attrs.NOTHING - simple_xor.input_2 = attrs.NOTHING - with pytest.raises(Exception) as excinfo: + simple_xor.input_2 = False + with pytest.raises( + ValueError, match="At least one of the mutually exclusive fields" + ): simple_xor._check_rules() - assert "input_1 is mandatory" in str(excinfo.value) - assert "no alternative provided by ['input_2', 'input_3']" in str(excinfo.value) - assert excinfo.type is AttributeError def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): @@ -1509,10 +1507,10 @@ def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): simple_xor.input_1 = "Input1" simple_xor.input_2 = True - with pytest.raises(Exception) as excinfo: + with pytest.raises( + ValueError, match="Mutually exclusive fields .* are set together" + ): simple_xor._check_rules() - assert "input_1 is mutually exclusive with ['input_2']" in str(excinfo.value) - assert excinfo.type is AttributeError def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): @@ -1522,9 +1520,8 @@ def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): simple_xor.input_2 = True simple_xor.input_3 = False - with pytest.raises(Exception) as excinfo: + with pytest.raises( + ValueError, + match=r".*Mutually exclusive fields \(input_1='Input1', input_2=True\) are set together", + ): simple_xor._check_rules() - assert "input_1 is mutually exclusive with ['input_2', 'input_3']" in str( - excinfo.value - ) - assert excinfo.type is AttributeError From af65aab4ddc716a8ed15a93b318e2752e4f399e0 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Mar 2025 11:31:33 +1100 Subject: [PATCH 311/342] fixed up 'xor' arg functionality --- pydra/engine/specs.py | 4 ++-- pydra/engine/tests/test_shelltask_inputspec.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 7c9bdd4912..4b4fbc3425 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -511,8 +511,8 @@ def _rule_violations(self) -> list[str]: ) elif not are_set: errors.append( - f"At least one of the mutually exclusive fields ({', '.join(field.xor)}) " - f"should be set" + "At least one of the mutually exclusive fields should be set: " + + ", ".join(f"{n}={v!r}" for n, v in mutually_exclusive.items()) ) # Raise error if any required field is unset. diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 94875e1085..4c5053ecf1 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1496,7 +1496,7 @@ def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): simple_xor = SimpleXor() simple_xor.input_2 = False with pytest.raises( - ValueError, match="At least one of the mutually exclusive fields" + ValueError, match="At least one of the mutually exclusive fields should be set:" ): simple_xor._check_rules() From 4a7708126da0ef431d72e0cc3c3d82dea8015ff1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Mar 2025 12:00:03 +1100 Subject: [PATCH 312/342] debugged test_shelltask_inputspec --- pydra/design/shell.py | 16 ++-- pydra/engine/helpers_file.py | 90 ++++++++----------- .../engine/tests/test_shelltask_inputspec.py | 11 ++- 3 files changed, 54 insertions(+), 63 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 7eb99d0508..667564e36b 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -236,7 +236,7 @@ class outarg(arg, Out): """ path_template: str | None = attrs.field(default=None) - keep_extension: bool = attrs.field(default=False) + keep_extension: bool = attrs.field(default=True) @path_template.validator def _validate_path_template(self, attribute, value): @@ -256,13 +256,13 @@ def _validate_path_template(self, attribute, value): # f"path_template ({value!r}) can only be provided when argstr is not None" # ) - @keep_extension.validator - def _validate_keep_extension(self, attribute, value): - if value and self.path_template is None: - raise ValueError( - f"keep_extension ({value!r}) can only be provided when path_template " - f"is provided" - ) + # @keep_extension.validator + # def _validate_keep_extension(self, attribute, value): + # if value and self.path_template is None: + # raise ValueError( + # f"keep_extension ({value!r}) can only be provided when path_template " + # f"is provided" + # ) @dataclass_transform( diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 0b19b075a5..8234975c80 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -8,12 +8,11 @@ from copy import copy import subprocess as sp from contextlib import contextmanager -import attr from fileformats.generic import FileSet from pydra.engine.helpers import is_lazy, attrs_values, list_fields if ty.TYPE_CHECKING: - from pydra.engine.specs import TaskDef + from pydra.engine.specs import ShellDef from pydra.design import shell logger = logging.getLogger("pydra") @@ -122,9 +121,9 @@ def template_update( """ - inputs_dict_st = attrs_values(definition) + values = attrs_values(definition) if map_copyfiles is not None: - inputs_dict_st.update(map_copyfiles) + values.update(map_copyfiles) from pydra.design import shell @@ -143,7 +142,7 @@ def template_update( dict_mod[fld.name] = template_update_single( field=fld, definition=definition, - input_values=inputs_dict_st, + values=values, output_dir=output_dir, ) # adding elements from map_copyfiles to fields with templates @@ -153,9 +152,9 @@ def template_update( def template_update_single( - field, - definition, - input_values: dict[str, ty.Any] = None, + field: "shell.outarg", + definition: "ShellDef", + values: dict[str, ty.Any] = None, output_dir: Path | None = None, spec_type: str = "input", ) -> Path | None: @@ -167,17 +166,17 @@ def template_update_single( # the dictionary will be created from inputs object from pydra.utils.typing import TypeParser, OUTPUT_TEMPLATE_TYPES # noqa - if input_values is None: - input_values = attrs_values(definition) + if values is None: + values = attrs_values(definition) if spec_type == "input": - inp_val_set = input_values[field.name] - if isinstance(inp_val_set, bool) and field.type in (Path, str): + field_value = values[field.name] + if isinstance(field_value, bool) and field.type in (Path, str): raise TypeError( f"type of '{field.name}' is Path, consider using Union[Path, bool]" ) - if inp_val_set is not None and not is_lazy(inp_val_set): - inp_val_set = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(inp_val_set) + if field_value is not None and not is_lazy(field_value): + field_value = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(field_value) elif spec_type == "output": if not TypeParser.contains_type(FileSet, field.type): raise TypeError( @@ -188,13 +187,13 @@ def template_update_single( raise TypeError(f"spec_type can be input or output, but {spec_type} provided") # for inputs that the value is set (so the template is ignored) if spec_type == "input": - if isinstance(inp_val_set, (Path, list)): - return inp_val_set - if inp_val_set is False: + if isinstance(field_value, (Path, list)): + return field_value + if field_value is False: # if input fld is set to False, the fld shouldn't be used (setting NOTHING) return None # inputs_dict[field.name] is True or spec_type is output - value = _template_formatting(field, definition, input_values) + value = _template_formatting(field, definition, values) # changing path so it is in the output_dir if output_dir and value is not None: # should be converted to str, it is also used for input fields that should be str @@ -206,7 +205,7 @@ def template_update_single( return None -def _template_formatting(field, definition, input_values): +def _template_formatting(field, definition, values): """Formatting the field template based on the values from inputs. Taking into account that the field with a template can be a MultiOutputFile and the field values needed in the template can be a list - @@ -218,9 +217,9 @@ def _template_formatting(field, definition, input_values): ---------- field : pydra.engine.helpers.Field field with a template - inputs : pydra.engine.helpers.Input - inputs object - inputs_dict_st : dict + definition : pydra.engine.specs.TaskDef + the task definition + values : dict dictionary with values from inputs object Returns @@ -235,23 +234,17 @@ def _template_formatting(field, definition, input_values): # as default, we assume that keep_extension is True if isinstance(template, (tuple, list)): - formatted = [ - _single_template_formatting(field, t, definition, input_values) - for t in template - ] + formatted = [_single_template_formatting(field, t, values) for t in template] else: assert isinstance(template, str) - formatted = _single_template_formatting( - field, template, definition, input_values - ) + formatted = _single_template_formatting(field, template, values) return formatted def _single_template_formatting( field: "shell.outarg", template: str, - definition: "TaskDef", - input_values: dict[str, ty.Any], + values: dict[str, ty.Any], ) -> Path | None: from pydra.utils.typing import MultiInputObj, MultiOutputFile @@ -274,29 +267,24 @@ def _single_template_formatting( for fld in inp_fields: fld_name = fld[1:-1] # extracting the name form {field_name} - if fld_name not in input_values: + if fld_name not in values: raise AttributeError(f"{fld_name} is not provided in the input") - fld_value = input_values[fld_name] - if isinstance(fld_value, Path): # Remove path - fld_value = fld_value.name - if fld_value is attr.NOTHING: + fld_value = values[fld_name] + if fld_value is None: # if value is NOTHING, nothing should be added to the command - return attr.NOTHING - else: - # checking for fields that can be treated as a file: - # have type File, or value that is path like (including str with extensions) - if isinstance(fld_value, os.PathLike) or ( - isinstance(fld_value, str) and "." in fld_value - ): - if file_template: - raise Exception( - f"can't have multiple paths in {field.name} template," - f" but {template} provided" - ) - else: - file_template = (fld_name, fld_value) + return None + # checking for fields that can be treated as a file: + # have type File, or value that is path like (including str with extensions) + if isinstance(fld_value, os.PathLike): + if file_template: + raise Exception( + f"can't have multiple paths in {field.name} template," + f" but {template} provided" + ) else: - val_dict[fld_name] = fld_value + file_template = (fld_name, fld_value) + else: + val_dict[fld_name] = fld_value # if field is MultiOutputFile and some elements from val_dict are lists, # each element of the list should be used separately in the template diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 4c5053ecf1..bd10577e0f 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1319,10 +1319,12 @@ class Outputs(ShellOutputs): allowed_values=[2, 3, 4, None], default=None, argstr="-d", + position=1, ) inputImageFilename: File = shell.arg( help="A scalar image is expected as input for noise correction.", argstr="-i", + position=2, ) noise_model: str | None = shell.arg( default=None, @@ -1344,12 +1346,13 @@ class Outputs(ShellOutputs): The shrink factor, specified as a single integer, describes this resampling. Shrink factor = 1 is the default. """, argstr="-s", + position=3, ) patch_radius: int = shell.arg( - default=1, help="Patch radius. Default = 1x1x1", argstr="-p", position=2 + default=1, help="Patch radius. Default = 1x1x1", argstr="-p", position=4 ) search_radius: int = shell.arg( - default=2, help="Search radius. Default = 2x2x2.", argstr="-r", position=3 + default=2, help="Search radius. Default = 2x2x2.", argstr="-r", position=5 ) output: str = shell.arg( help="Combined output", @@ -1419,11 +1422,11 @@ class Outputs(ShellOutputs): ) assert get_output_names(denoise_image) == [ + "correctedImage", + "noiseImage", "return_code", "stderr", "stdout", - "correctedImage", - "noiseImage", ] # adding image_dimensionality that has allowed_values [2, 3, 4] From 7f1b259d4acc7b407ea475a1a74a8aa355168170 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Mar 2025 13:04:49 +1100 Subject: [PATCH 313/342] fixed up issue with optional xor --- pydra/design/base.py | 19 +++++++++++++------ pydra/design/boutiques.py | 6 ++++-- pydra/design/python.py | 6 ++++-- pydra/design/shell.py | 12 ++++++++---- pydra/design/tests/test_shell.py | 8 ++++---- pydra/design/workflow.py | 6 ++++-- pydra/engine/specs.py | 6 +++--- pydra/engine/tests/test_helpers_file.py | 9 +++++---- .../engine/tests/test_shelltask_inputspec.py | 2 +- pydra/engine/tests/test_task.py | 4 ++-- 10 files changed, 48 insertions(+), 30 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index d25301d263..3845c2bb65 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -244,12 +244,14 @@ class Arg(Field): the default value for the field, by default it is NO_DEFAULT help: str A short description of the input field. - allowed_values: list, optional - List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list[str], optional - Names of the inputs that are mutually exclusive with the field. + allowed_values: Sequence, optional + List of allowed values for the field. + xor: Sequence[str | None], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -263,8 +265,8 @@ class Arg(Field): it is False """ - allowed_values: tuple = attrs.field(default=(), converter=tuple) - xor: tuple[str] = attrs.field(default=(), converter=tuple) + allowed_values: frozenset = attrs.field(default=(), converter=frozenset) + xor: frozenset[str | None] = attrs.field(default=(), converter=frozenset) copy_mode: File.CopyMode = File.CopyMode.any copy_collation: File.CopyCollation = File.CopyCollation.any copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single @@ -272,6 +274,11 @@ class Arg(Field): @xor.validator def _xor_validator(self, _, value): + for v in value: + if not isinstance(v, (str, type(None))): + raise ValueError( + f"xor values must be strings or None, not {v} ({self!r})" + ) if value and self.type not in (ty.Any, bool) and not is_optional(self.type): raise ValueError( f"Fields that have 'xor' must be of boolean or optional type, " diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 334552b5b1..02bfc244fe 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -28,8 +28,10 @@ class arg(shell.arg): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list[str], optional - Names of the inputs that are mutually exclusive with the field. + xor: list[str | None], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional diff --git a/pydra/design/python.py b/pydra/design/python.py index 095404f41b..aba8343cc9 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -33,8 +33,10 @@ class arg(Arg): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list, optional - Names of the inputs that are mutually exclusive with the field. + xor: list[str | None], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 667564e36b..03c88a9362 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -56,8 +56,10 @@ class arg(Arg): List of allowed values for the field. requires: list, optional List of field names that are required together with the field. - xor: list, optional - List of field names that are mutually exclusive with the field. + xor: list[str | None], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -192,8 +194,10 @@ class outarg(arg, Out): List of allowed values for the field. requires: list, optional List of field names that are required together with the field. - xor: list, optional - List of field names that are mutually exclusive with the field. + xor: list[str | None], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 4c2c2f91bf..e760a6ba29 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -502,14 +502,14 @@ class Ls(ShellDef["Ls.Outputs"]): argstr="-T", default=False, requires=["long_format"], - xor=["date_format_str"], + xor=["complete_date", "date_format_str", None], ) date_format_str: str | None = shell.arg( help="format string for ", argstr="-D", default=None, requires=["long_format"], - xor=["complete_date"], + xor=["complete_date", "date_format_str", None], ) @shell.outputs @@ -557,7 +557,7 @@ class Outputs(ShellOutputs): argstr="-T", default=False, requires=["long_format"], - xor=["date_format_str"], + xor=["complete_date", "date_format_str", None], ), "date_format_str": shell.arg( type=str | None, @@ -565,7 +565,7 @@ class Outputs(ShellOutputs): default=None, argstr="-D", requires=["long_format"], - xor=["complete_date"], + xor=["date_format_str", "complete_date", None], ), }, outputs={ diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 119cd95918..9424e234aa 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -37,8 +37,10 @@ class arg(Arg): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list, optional - Names of the inputs that are mutually exclusive with the field. + xor: list[str | None], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 4b4fbc3425..35ea78da60 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -503,13 +503,13 @@ def _rule_violations(self) -> list[str]: # Collect alternative fields associated with this field. if field.xor: - mutually_exclusive = {name: self[name] for name in field.xor} + mutually_exclusive = {name: self[name] for name in field.xor if name} are_set = [f"{n}={v!r}" for n, v in mutually_exclusive.items() if v] if len(are_set) > 1: errors.append( f"Mutually exclusive fields ({', '.join(are_set)}) are set together" ) - elif not are_set: + elif not are_set and None not in field.xor: errors.append( "At least one of the mutually exclusive fields should be set: " + ", ".join(f"{n}={v!r}" for n, v in mutually_exclusive.items()) @@ -568,7 +568,7 @@ def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: f"of {field} " + str(list(unrecognised)) ) for inpt in inputs.values(): - if unrecognised := set(inpt.xor) - input_names: + if unrecognised := inpt.xor - (input_names | {None}): raise ValueError( "'Unrecognised' field names in referenced in the xor " f"of {inpt} " + str(list(unrecognised)) diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 3b15e5bfd2..f940a6f398 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -393,13 +393,14 @@ def test_template_formatting(tmp_path: Path): field.name = "grad" field.argstr = "--grad" field.path_template = ("{in_file}.bvec", "{in_file}.bval") - inputs = Mock() - inputs_dict = {"in_file": "/a/b/c/file.txt", "grad": True} + field.keep_extension = False + definition = Mock() + values = {"in_file": Path("/a/b/c/file.txt"), "grad": True} assert template_update_single( field, - inputs, - input_values=inputs_dict, + definition, + values=values, output_dir=tmp_path, spec_type="input", ) == [tmp_path / "file.bvec", tmp_path / "file.bval"] diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index bd10577e0f..595e6504dd 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1125,7 +1125,7 @@ class Outputs(ShellOutputs): help="inpA", argstr="", ) - inpStr: str = shell.arg( + inpStr: Path = shell.arg( position=2, help="inp str with extension", argstr="-i", diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 153d0a3e9e..6a2aa9bc16 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -1449,11 +1449,11 @@ class Defn(ShellDef["Defn.Outputs"]): class Outputs(ShellOutputs): pass - inputs = Defn(a1_field="1", b2_field=2.0, c3_field={"c": "3"}, d4_field=["4"]) + values = dict(a1_field="1", b2_field=2.0, c3_field={"c": "3"}, d4_field=["4"]) assert ( argstr_formatting( "{a1_field} {b2_field:02f} -test {c3_field[c]} -me {d4_field[0]}", - inputs, + values, ) == "1 2.000000 -test 3 -me 4" ) From 26645097d20ff24d076f46d8f75f80433b1f0e90 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Mar 2025 13:34:35 +1100 Subject: [PATCH 314/342] handle empty state arrays, i.e. nodes that don't run any jobs --- pydra/engine/submitter.py | 27 +++++++++++++++------------ pydra/engine/tests/test_node_task.py | 2 +- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 6858fadf1b..4f25287624 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -503,17 +503,17 @@ class NodeExecution(ty.Generic[DefType]): submitter: Submitter # List of tasks that were completed successfully - successful: dict[StateIndex | None, list["Task[DefType]"]] + successful: dict[StateIndex, list["Task[DefType]"]] # List of tasks that failed - errored: dict[StateIndex | None, "Task[DefType]"] + errored: dict[StateIndex, "Task[DefType]"] # List of tasks that couldn't be run due to upstream errors - unrunnable: dict[StateIndex | None, list["Task[DefType]"]] + unrunnable: dict[StateIndex, list["Task[DefType]"]] # List of tasks that are queued - queued: dict[StateIndex | None, "Task[DefType]"] + queued: dict[StateIndex, "Task[DefType]"] # List of tasks that are queued - running: dict[StateIndex | None, tuple["Task[DefType]", datetime]] + running: dict[StateIndex, tuple["Task[DefType]", datetime]] # List of tasks that are blocked on other tasks to complete before they can be run - blocked: dict[StateIndex | None, "Task[DefType]"] + blocked: dict[StateIndex, "Task[DefType]"] | None _tasks: dict[StateIndex | None, "Task[DefType]"] | None @@ -532,7 +532,7 @@ def __init__( self.submitter = submitter # Initialize the state dictionaries self._tasks = None - self.blocked = {} + self.blocked = None self.successful = {} self.errored = {} self.queued = {} @@ -568,10 +568,13 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def task(self, index: StateIndex = StateIndex()) -> "Task | list[Task[DefType]]": + def task( + self, index: StateIndex = StateIndex() + ) -> "Task | StateArray[Task[DefType]]": """Get a task object for a given state index.""" - self.tasks # Ensure tasks are loaded - task_index = next(iter(self._tasks)) + if not self.tasks: + return StateArray([]) + task_index = next(iter(self._tasks)) if self._tasks else StateIndex() if len(task_index) > len(index): tasks = [] for ind, task in self._tasks.items(): @@ -589,7 +592,7 @@ def started(self) -> bool: or self.errored or self.unrunnable or self.queued - or self.blocked + or self.blocked is not None ) @property @@ -730,7 +733,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: runnable: list["Task[DefType]"] = [] self.tasks # Ensure tasks are loaded if not self.started: - assert self._tasks + assert self._tasks is not None self.blocked = copy(self._tasks) # Check to see if any blocked tasks are now runnable/unrunnable for index, task in list(self.blocked.items()): diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 1abfc1d96c..dba7360d11 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -887,7 +887,7 @@ def test_task_state_3(plugin, tmp_path): assert state.splitter_rpn == ["NA.a"] assert nn.a == [] - with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + with Submitter(worker="debug", cache_dir=tmp_path) as sub: results = sub(nn) assert not results.errored, "\n".join(results.errors["error message"]) From a52b748118e5ea2b5d37fea3c96c913d7da7c0db Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 6 Mar 2025 15:35:32 +1100 Subject: [PATCH 315/342] renamed NodeExecution.task to NodeExecution.get_tasks --- pydra/engine/lazy.py | 2 +- pydra/engine/submitter.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index fd1f628a24..58dd5dae4a 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -160,7 +160,7 @@ def _get_value( if state_index is None: state_index = StateIndex() - task = graph.node(self._node.name).task(state_index) + task = graph.node(self._node.name).get_tasks(state_index) _, split_depth = TypeParser.strip_splits(self._type) def get_nested(task: "Task[DefType]", depth: int): diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 4f25287624..d3016405f5 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -568,7 +568,7 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def task( + def get_tasks( self, index: StateIndex = StateIndex() ) -> "Task | StateArray[Task[DefType]]": """Get a task object for a given state index.""" @@ -740,7 +740,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: - pred_jobs = pred.task(index) + pred_jobs = pred.get_tasks(index) if isinstance(pred_jobs, StateArray): pred_inds = [j.state_index for j in pred_jobs] else: From 9df11910b81c08d0b7cd199f51bdf2d5cc408e2d Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 17:45:22 +1100 Subject: [PATCH 316/342] moved xor into *.define decorators from *.arg fields --- pydra/design/base.py | 35 +++++------ pydra/design/boutiques.py | 4 -- pydra/design/python.py | 10 ++-- pydra/design/shell.py | 14 ++--- pydra/design/tests/test_shell.py | 4 +- pydra/design/workflow.py | 10 ++-- pydra/engine/specs.py | 59 +++++++++++++------ pydra/engine/tests/test_shelltask.py | 39 +++++------- .../engine/tests/test_shelltask_inputspec.py | 5 +- 9 files changed, 90 insertions(+), 90 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 3845c2bb65..6cafa4ae14 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -248,10 +248,6 @@ class Arg(Field): Names of the inputs that are required together with the field. allowed_values: Sequence, optional List of allowed values for the field. - xor: Sequence[str | None], optional - Names of args that are exclusive mutually exclusive, which must include - the name of the current field. If this list includes None, then none of the - fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -266,25 +262,11 @@ class Arg(Field): """ allowed_values: frozenset = attrs.field(default=(), converter=frozenset) - xor: frozenset[str | None] = attrs.field(default=(), converter=frozenset) copy_mode: File.CopyMode = File.CopyMode.any copy_collation: File.CopyCollation = File.CopyCollation.any copy_ext_decomp: File.ExtensionDecomposition = File.ExtensionDecomposition.single readonly: bool = False - @xor.validator - def _xor_validator(self, _, value): - for v in value: - if not isinstance(v, (str, type(None))): - raise ValueError( - f"xor values must be strings or None, not {v} ({self!r})" - ) - if value and self.type not in (ty.Any, bool) and not is_optional(self.type): - raise ValueError( - f"Fields that have 'xor' must be of boolean or optional type, " - f"not type {self.type} ({self!r})" - ) - @attrs.define(kw_only=True, slots=False) class Out(Field): @@ -418,6 +400,7 @@ def make_task_def( name: str | None = None, bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), ): """Create a task definition class and its outputs definition class from the input and output fields provided to the decorator/function. @@ -442,6 +425,10 @@ def make_task_def( The base classes for the task definition class, by default () outputs_bases : ty.Sequence[type], optional The base classes for the outputs definition class, by default () + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. Returns ------- @@ -449,7 +436,15 @@ def make_task_def( The class created using the attrs package """ - spec_type._check_arg_refs(inputs, outputs) + # Convert a single xor set into a set of xor sets + if not xor: + xor = frozenset() + elif all(isinstance(x, str) or x is None for x in xor): + xor = frozenset([frozenset(xor)]) + else: + xor = frozenset(frozenset(x) for x in xor) + + spec_type._check_arg_refs(inputs, outputs, xor) # Check that the field attributes are valid after all fields have been set # (especially the type) @@ -521,6 +516,8 @@ def make_task_def( **attrs_kwargs, ), ) + # Store the xor sets for the class + klass._xor = xor klass.__annotations__[arg.name] = field_type # Create class using attrs package, will create attributes for all columns and diff --git a/pydra/design/boutiques.py b/pydra/design/boutiques.py index 02bfc244fe..4fd8d43760 100644 --- a/pydra/design/boutiques.py +++ b/pydra/design/boutiques.py @@ -28,10 +28,6 @@ class arg(shell.arg): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list[str | None], optional - Names of args that are exclusive mutually exclusive, which must include - the name of the current field. If this list includes None, then none of the - fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional diff --git a/pydra/design/python.py b/pydra/design/python.py index aba8343cc9..322a55a923 100644 --- a/pydra/design/python.py +++ b/pydra/design/python.py @@ -33,10 +33,6 @@ class arg(Arg): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list[str | None], optional - Names of args that are exclusive mutually exclusive, which must include - the name of the current field. If this list includes None, then none of the - fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -106,6 +102,7 @@ def define( bases: ty.Sequence[type] = (), outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), ) -> "PythonDef": """ Create an interface for a function or a class. @@ -120,6 +117,10 @@ def define( The outputs of the function or class. auto_attribs : bool Whether to use auto_attribs mode when creating the class. + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. Returns ------- @@ -172,6 +173,7 @@ def make(wrapped: ty.Callable | type) -> PythonDef: klass=klass, bases=bases, outputs_bases=outputs_bases, + xor=xor, ) return defn diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 03c88a9362..4a53208e23 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -56,10 +56,6 @@ class arg(Arg): List of allowed values for the field. requires: list, optional List of field names that are required together with the field. - xor: list[str | None], optional - Names of args that are exclusive mutually exclusive, which must include - the name of the current field. If this list includes None, then none of the - fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -194,10 +190,6 @@ class outarg(arg, Out): List of allowed values for the field. requires: list, optional List of field names that are required together with the field. - xor: list[str | None], optional - Names of args that are exclusive mutually exclusive, which must include - the name of the current field. If this list includes None, then none of the - fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -291,6 +283,7 @@ def define( outputs_bases: ty.Sequence[type] = (), auto_attribs: bool = True, name: str | None = None, + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), ) -> "ShellDef": """Create a task definition for a shell command. Can be used either as a decorator on the "canonical" dataclass-form of a task definition or as a function that takes a @@ -337,6 +330,10 @@ def define( as they appear in the template name: str | None The name of the returned class + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. Returns ------- @@ -446,6 +443,7 @@ def make( klass=klass, bases=bases, outputs_bases=outputs_bases, + xor=xor, ) return defn diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index e760a6ba29..7269166ad1 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -469,7 +469,7 @@ def test_interface_template_with_type_overrides(): def Ls(request): if request.param == "static": - @shell.define + @shell.define(xor=["complete_date", "date_format_str", None]) class Ls(ShellDef["Ls.Outputs"]): executable = "ls" @@ -502,14 +502,12 @@ class Ls(ShellDef["Ls.Outputs"]): argstr="-T", default=False, requires=["long_format"], - xor=["complete_date", "date_format_str", None], ) date_format_str: str | None = shell.arg( help="format string for ", argstr="-D", default=None, requires=["long_format"], - xor=["complete_date", "date_format_str", None], ) @shell.outputs diff --git a/pydra/design/workflow.py b/pydra/design/workflow.py index 9424e234aa..c1be87298f 100644 --- a/pydra/design/workflow.py +++ b/pydra/design/workflow.py @@ -37,10 +37,6 @@ class arg(Arg): List of allowed values for the field. requires: list, optional Names of the inputs that are required together with the field. - xor: list[str | None], optional - Names of args that are exclusive mutually exclusive, which must include - the name of the current field. If this list includes None, then none of the - fields need to be set. copy_mode: File.CopyMode, optional The mode of copying the file, by default it is File.CopyMode.any copy_collation: File.CopyCollation, optional @@ -111,6 +107,7 @@ def define( outputs_bases: ty.Sequence[type] = (), lazy: list[str] | None = None, auto_attribs: bool = True, + xor: ty.Sequence[str | None] | ty.Sequence[ty.Sequence[str | None]] = (), ) -> "WorkflowDef": """ Create an interface for a function or a class. Can be used either as a decorator on @@ -126,6 +123,10 @@ def define( The outputs of the function or class. auto_attribs : bool Whether to use auto_attribs mode when creating the class. + xor: Sequence[str | None] | Sequence[Sequence[str | None]], optional + Names of args that are exclusive mutually exclusive, which must include + the name of the current field. If this list includes None, then none of the + fields need to be set. Returns ------- @@ -183,6 +184,7 @@ def make(wrapped: ty.Callable | type) -> TaskDef: klass=klass, bases=bases, outputs_bases=outputs_bases, + xor=xor, ) return defn diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 35ea78da60..c258f9735e 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -173,6 +173,11 @@ def reset(self): class TaskDef(ty.Generic[OutputsType]): """Base class for all task definitions""" + # Class attributes + _xor: frozenset[frozenset[str | None]] = ( + frozenset() + ) # overwritten in derived classes + # The following fields are used to store split/combine state information _splitter = attrs.field(default=None, init=False, repr=False) _combiner = attrs.field(default=None, init=False, repr=False) @@ -501,20 +506,6 @@ def _rule_violations(self) -> list[str]: ): errors.append(f"Mandatory field {field.name!r} is not set") - # Collect alternative fields associated with this field. - if field.xor: - mutually_exclusive = {name: self[name] for name in field.xor if name} - are_set = [f"{n}={v!r}" for n, v in mutually_exclusive.items() if v] - if len(are_set) > 1: - errors.append( - f"Mutually exclusive fields ({', '.join(are_set)}) are set together" - ) - elif not are_set and None not in field.xor: - errors.append( - "At least one of the mutually exclusive fields should be set: " - + ", ".join(f"{n}={v!r}" for n, v in mutually_exclusive.items()) - ) - # Raise error if any required field is unset. if ( not ( @@ -538,6 +529,19 @@ def _rule_violations(self) -> list[str]: errors.append( f"{field.name!r} requires{qualification} {[str(r) for r in field.requires]}" ) + # Collect alternative fields associated with this field. + for xor_set in self._xor: + mutually_exclusive = {name: self[name] for name in xor_set if name} + are_set = [f"{n}={v!r}" for n, v in mutually_exclusive.items() if v] + if len(are_set) > 1: + errors.append( + f"Mutually exclusive fields ({', '.join(are_set)}) are set together" + ) + elif not are_set and None not in xor_set: + errors.append( + "At least one of the mutually exclusive fields should be set: " + + ", ".join(f"{n}={v!r}" for n, v in mutually_exclusive.items()) + ) return errors def _check_rules(self): @@ -552,7 +556,12 @@ def _check_rules(self): ) @classmethod - def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: + def _check_arg_refs( + cls, + inputs: list[Arg], + outputs: list[Out], + xor: frozenset[frozenset[str | None]], + ) -> None: """ Checks if all fields referenced in requirements and xor are present in the inputs are valid field names @@ -567,12 +576,22 @@ def _check_arg_refs(cls, inputs: list[Arg], outputs: list[Out]) -> None: "'Unrecognised' field names in referenced in the requirements " f"of {field} " + str(list(unrecognised)) ) - for inpt in inputs.values(): - if unrecognised := inpt.xor - (input_names | {None}): + + for xor_set in xor: + if unrecognised := xor_set - (input_names | {None}): raise ValueError( - "'Unrecognised' field names in referenced in the xor " - f"of {inpt} " + str(list(unrecognised)) + f"'Unrecognised' field names in referenced in the xor {xor_set} " + + str(list(unrecognised)) ) + for field_name in xor_set: + if field_name is None: # i.e. none of the fields being set is valid + continue + type_ = inputs[field_name].type + if type_ not in (ty.Any, bool) and not is_optional(type_): + raise ValueError( + f"Fields included in a 'xor' ({field.name!r}) must be of boolean " + f"or optional types, not type {type_}" + ) def _check_resolved(self): """Checks that all the fields in the definition have been resolved""" @@ -762,6 +781,8 @@ def _from_task(cls, task: "Task[WorkflowDef]") -> Self: for name, lazy_field in attrs_values(workflow.outputs).items(): try: val_out = lazy_field._get_value(workflow=workflow, graph=exec_graph) + if isinstance(val_out, StateArray): + val_out = list(val_out) # implicitly combine state arrays output_wf[name] = val_out except (ValueError, AttributeError): output_wf[name] = None diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 5d7b99f407..5afe8229f6 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -594,20 +594,18 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): cmd_exec = "ls" cmd_t = True - @shell.define + @shell.define(xor=["opt_S", "opt_t"]) class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd_exec opt_t: bool = shell.arg( position=1, help="opt t", argstr="-t", - xor=["opt_S"], ) opt_S: bool = shell.arg( position=2, help="opt S", argstr="-S", - xor=["opt_t"], ) class Outputs(ShellOutputs): @@ -626,20 +624,18 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): cmd_t = True cmd_S = True - @shell.define + @shell.define(xor=["opt_S", "opt_t"]) class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd_exec opt_t: bool = shell.arg( position=1, help="opt t", argstr="-t", - xor=["opt_S"], ) opt_S: bool = shell.arg( position=2, help="opt S", argstr="-S", - xor=["opt_t"], ) class Outputs(ShellOutputs): @@ -685,7 +681,7 @@ class Outputs(ShellOutputs): results_function(shelly, plugin=plugin, cache_dir=tmp_path) -def test_shell_cmd_inputspec_6a_exception(plugin): +def test_shell_cmd_inputspec_6a_exception(plugin, tmp_path): """checking requires in metadata: the required field is None, so the task works raises exception """ @@ -3066,21 +3062,21 @@ def no_fsl(): def test_fsl(data_tests_dir, tmp_path): """mandatory field added to fields, value provided""" - _xor_inputs = [ - "functional", - "reduce_bias", - "robust", - "padding", - "remove_eyes", - "surfaces", - "t2_guided", - ] - def change_name(file): name, ext = os.path.splitext(file) return f"{name}_brain.{ext}" - @shell.define + @shell.define( + xor=[ + "functional", + "reduce_bias", + "robust", + "padding", + "remove_eyes", + "surfaces", + "t2_guided", + ] + ) class Shelly(ShellDef["Shelly.Outputs"]): executable = "bet" in_file: File = shell.arg( @@ -3131,36 +3127,29 @@ class Shelly(ShellDef["Shelly.Outputs"]): robust: bool = shell.arg( help="robust brain centre estimation (iterates BET several times)", argstr="-R", - xor=_xor_inputs, ) padding: bool = shell.arg( help="improve BET if FOV is very small in Z (by temporarily padding end slices", argstr="-Z", - xor=_xor_inputs, ) remove_eyes: bool = shell.arg( help="eye & optic nerve cleanup (can be useful in SIENA)", argstr="-S", - xor=_xor_inputs, ) surfaces: bool = shell.arg( help="run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", argstr="-A", - xor=_xor_inputs, ) t2_guided: ty.Union[File, str] = shell.arg( help="as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", argstr="-A2", - xor=_xor_inputs, ) functional: bool = shell.arg( argstr="-F", - xor=_xor_inputs, help="apply to 4D fMRI data", ) reduce_bias: bool = shell.arg( argstr="-B", - xor=_xor_inputs, help="bias field and neck cleanup", ) diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index 595e6504dd..413469d1f7 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1453,24 +1453,21 @@ class Outputs(ShellOutputs): # tests with XOR in input metadata -@shell.define +@shell.define(xor=("input_1", "input_2", "input_3")) class SimpleXor(ShellDef["SimpleTaskXor.Outputs"]): input_1: str | None = shell.arg( default=None, help="help", - xor=("input_1", "input_2", "input_3"), ) input_2: bool | None = shell.arg( default=None, help="help", argstr="--i2", - xor=("input_1", "input_2", "input_3"), ) input_3: bool | None = shell.arg( default=None, help="help", - xor=("input_1", "input_2", "input_3"), ) @shell.outputs From 1cc40741492bd6519c0ce694fdeed129a769c81f Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 17:46:52 +1100 Subject: [PATCH 317/342] debugging combining states to preserve nested lists over staggered combines --- pydra/engine/core.py | 5 +- pydra/engine/lazy.py | 89 +++++++++++++++------------- pydra/engine/node.py | 8 +-- pydra/engine/state.py | 70 ++++++++++++++++++++-- pydra/engine/submitter.py | 56 ++++++++--------- pydra/engine/tests/test_node_task.py | 17 +----- 6 files changed, 148 insertions(+), 97 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 5619966700..d2fbdcf769 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -22,7 +22,7 @@ from pydra.engine import state from .lazy import LazyInField, LazyOutField from pydra.utils.hash import hash_function, Cache -from pydra.utils.typing import TypeParser, StateArray +from pydra.engine.state import State from .node import Node from datetime import datetime from fileformats.core import FileSet @@ -710,8 +710,7 @@ def construct( ) for outpt, outpt_lf in zip(output_fields, output_lazy_fields): # Automatically combine any uncombined state arrays into a single lists - if TypeParser.get_origin(outpt_lf._type) is StateArray: - outpt_lf._type = list[TypeParser.strip_splits(outpt_lf._type)[0]] + outpt_lf._type = State.combine_state_arrays(outpt_lf._type) setattr(outputs, outpt.name, outpt_lf) else: if unset_outputs := [ diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 58dd5dae4a..ecbaa21111 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -1,5 +1,6 @@ import typing as ty import abc +from operator import attrgetter import attrs from pydra.utils.typing import StateArray from pydra.utils.hash import hash_single @@ -152,54 +153,60 @@ def _get_value( value : Any the resolved value of the lazy-field """ - from pydra.utils.typing import ( - TypeParser, - ) # pylint: disable=import-outside-toplevel from pydra.engine.state import StateIndex if state_index is None: state_index = StateIndex() - task = graph.node(self._node.name).get_tasks(state_index) - _, split_depth = TypeParser.strip_splits(self._type) - - def get_nested(task: "Task[DefType]", depth: int): - if isinstance(task, StateArray): - val = [get_nested(task=t, depth=depth - 1) for t in task] - if depth: - val = StateArray[self._type](val) - else: - if task.errored: - raise ValueError( - f"Cannot retrieve value for {self._field} from {self._node.name} as " - "the node errored" - ) - res = task.result() - if res is None: - raise RuntimeError( - f"Could not find results of '{task.name}' node in a sub-directory " - f"named '{{{task.checksum}}}' in any of the cache locations.\n" - + "\n".join(str(p) for p in set(task.cache_locations)) - + f"\n\nThis is likely due to hash changes in '{task.name}' node inputs. " - f"Current values and hashes: {task.inputs}, " - f"{task.definition._hash}\n\n" - "Set loglevel to 'debug' in order to track hash changes " - "throughout the execution of the workflow.\n\n " - "These issues may have been caused by `bytes_repr()` methods " - "that don't return stable hash values for specific object " - "types across multiple processes (see bytes_repr() " - '"singledispatch "function in pydra/utils/hash.py).' - "You may need to write specific `bytes_repr()` " - "implementations (see `pydra.utils.hash.register_serializer`) or a " - "`__bytes_repr__()` dunder methods to handle one or more types in " - "your interface inputs." - ) - val = res.get_output_field(self._field) - val = self._apply_cast(val) + jobs = sorted( + graph.node(self._node.name).matching_jobs(state_index), + key=attrgetter("state_index"), + ) + + def retrieve_from_job(job: "Task[DefType]") -> ty.Any: + if job.errored: + raise ValueError( + f"Cannot retrieve value for {self._field} from {self._node.name} as " + "the node errored" + ) + res = job.result() + if res is None: + raise RuntimeError( + f"Could not find results of '{job.name}' node in a sub-directory " + f"named '{{{job.checksum}}}' in any of the cache locations.\n" + + "\n".join(str(p) for p in set(job.cache_locations)) + + f"\n\nThis is likely due to hash changes in '{job.name}' node inputs. " + f"Current values and hashes: {job.inputs}, " + f"{job.definition._hash}\n\n" + "Set loglevel to 'debug' in order to track hash changes " + "throughout the execution of the workflow.\n\n " + "These issues may have been caused by `bytes_repr()` methods " + "that don't return stable hash values for specific object " + "types across multiple processes (see bytes_repr() " + '"singledispatch "function in pydra/utils/hash.py).' + "You may need to write specific `bytes_repr()` " + "implementations (see `pydra.utils.hash.register_serializer`) or a " + "`__bytes_repr__()` dunder methods to handle one or more types in " + "your interface inputs." + ) + val = res.get_output_field(self._field) + val = self._apply_cast(val) return val - value = get_nested(task, depth=split_depth) - return value + if not self._node.state.depth(after_combine=False): + assert len(jobs) == 1 + return retrieve_from_job(jobs[0]) + elif not self._node.state.keys_final: # all states are combined over + return [retrieve_from_job(j) for j in jobs] + elif self._node.state.combiner: + values = StateArray() + for ind in self._node.state.states_ind_final: + values.append( + [retrieve_from_job(j) for j in jobs if j.state_index.matches(ind)] + ) + return values + else: + return StateArray(retrieve_from_job(j) for j in jobs) @property def _source(self): diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 661418d034..5d3e9b1cef 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -2,7 +2,6 @@ from copy import deepcopy from enum import Enum import attrs -from pydra.utils.typing import TypeParser, StateArray from . import lazy from pydra.engine.helpers import ( attrs_values, @@ -128,12 +127,7 @@ def lzout(self) -> OutputType: # types based on the number of states the node is split over and whether # it has a combiner if self._state: - type_, _ = TypeParser.strip_splits(outpt._type) - if self._state.combiner: - type_ = list[type_] - for _ in range(self._state.depth()): - type_ = StateArray[type_] - outpt._type = type_ + outpt._type = self._state.nest_output_type(outpt._type) # Flag the output lazy fields as being not typed checked (i.e. assigned to # another node's inputs) yet. This is used to prevent the user from changing # the type of the output after it has been accessed by connecting it to an diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 201a5783b5..a5f0cba3bf 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -3,10 +3,12 @@ from copy import deepcopy import itertools from collections import OrderedDict +from operator import itemgetter from functools import reduce import typing as ty from . import helpers_state as hlpst from .helpers import ensure_list, attrs_values +from pydra.utils.typing import StateArray, TypeParser # from .specs import BaseDef if ty.TYPE_CHECKING: @@ -47,6 +49,18 @@ def __len__(self) -> int: def __iter__(self) -> ty.Generator[str, None, None]: return iter(self.indices) + def __getitem__(self, key: str) -> int: + return self.indices[key] + + def __lt__(self, other: "StateIndex") -> bool: + if set(self.indices) != set(other.indices): + raise ValueError( + f"StateIndex {self} does not contain the same indices as {other}" + ) + return sorted(self.indices.items(), key=itemgetter(0)) < sorted( + other.indices.items(), key=itemgetter(0) + ) + def __repr__(self) -> str: return ( "StateIndex(" + ", ".join(f"{n}={v}" for n, v in self.indices.items()) + ")" @@ -79,6 +93,21 @@ def subset(self, state_names: ty.Iterable[str]) -> ty.Self: """ return type(self)({k: v for k, v in self.indices.items() if k in state_names}) + def missing(self, state_names: ty.Iterable[str]) -> ty.List[str]: + """Return the fields that are missing from the StateIndex + + Parameters + ---------- + fields : list[str] + the fields to check for + + Returns + ------- + list[str] + the fields that are missing from the StateIndex + """ + return [f for f in state_names if f not in self.indices] + def matches(self, other: "StateIndex") -> bool: """Check if the indices that are present in the other StateIndex match @@ -92,6 +121,8 @@ def matches(self, other: "StateIndex") -> bool: bool True if all the indices in the other StateIndex match """ + if isinstance(other, dict): + other = StateIndex(other) if not set(self.indices).issuperset(other.indices): raise ValueError( f"StateIndex {self} does not contain all the indices in {other}" @@ -211,10 +242,6 @@ def __str__(self): @property def names(self): """Return the names of the states.""" - # analysing states from connected tasks if inner_inputs - if not hasattr(self, "keys_final"): - self.prepare_states() - self.prepare_inputs() previous_states_keys = { f"_{v.name}": v.keys_final for v in self.inner_inputs.values() } @@ -265,6 +292,41 @@ def included(s): remaining_stack = [s for s in stack if included(s)] return depth + len(remaining_stack) + def nest_output_type(self, type_: type) -> type: + """Nests a type of an output field in a combination of lists and state-arrays + based on the state's splitter and combiner + + Parameters + ---------- + type_ : type + the type of the output field + + Returns + ------- + type + the nested type of the output field + """ + + state_array_depth = self.depth() + + # If there is a combination, it will get flattened into a single list + if self.depth(after_combine=False) > state_array_depth: + type_ = list[type_] + + # Nest the uncombined state arrays around the type + for _ in range(state_array_depth): + type_ = StateArray[type_] + return type_ + + @classmethod + def combine_state_arrays(cls, type_: type) -> type: + """Collapses (potentially nested) state array(s) into a single list""" + if TypeParser.get_origin(type_) is StateArray: + # Implicitly combine any remaining uncombined states into a single + # list + type_ = list[TypeParser.strip_splits(type_)[0]] + return type_ + @property def splitter(self): """Get the splitter of the state.""" diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index d3016405f5..9bdcaa8f97 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -6,7 +6,7 @@ import os from pathlib import Path from tempfile import mkdtemp -from copy import copy +from copy import copy, deepcopy from datetime import datetime from collections import defaultdict import attrs @@ -211,16 +211,15 @@ def __call__( from pydra.engine.specs import TaskDef state = State( - name="not-important", + name="outer_split", definition=task_def, - splitter=task_def._splitter, - combiner=task_def._combiner, + splitter=deepcopy(task_def._splitter), + combiner=deepcopy(task_def._combiner), ) - list_depth = 2 if state.depth(after_combine=False) != state.depth() else 1 def wrap_type(tp): - for _ in range(list_depth): - tp = list[tp] + tp = state.nest_output_type(tp) + tp = state.combine_state_arrays(tp) return tp output_types = { @@ -568,22 +567,27 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def get_tasks( - self, index: StateIndex = StateIndex() - ) -> "Task | StateArray[Task[DefType]]": - """Get a task object for a given state index.""" - if not self.tasks: - return StateArray([]) - task_index = next(iter(self._tasks)) if self._tasks else StateIndex() - if len(task_index) > len(index): - tasks = [] - for ind, task in self._tasks.items(): - if ind.matches(index): - tasks.append(task) - return StateArray(tasks) - elif len(index) > len(task_index): - index = index.subset(task_index) - return self._tasks[index] + def matching_jobs(self, index: StateIndex = StateIndex()) -> "StateArray[Task]": + """Get the jobs that match a given state index. + + Parameters + ---------- + index : StateIndex, optional + The state index of the task to get, by default StateIndex() + """ + matching = StateArray() + if self.tasks: + task_index = next(iter(self._tasks)) if self._tasks else StateIndex() + if len(task_index) > len(index): + # Select matching tasks and return them in nested state-array objects + for ind, task in self._tasks.items(): + if ind.matches(index): + matching.append(task) + elif len(index) > len(task_index): + matching.append( + self._tasks[index.subset(task_index)] + ) # Return a single task + return matching @property def started(self) -> bool: @@ -740,11 +744,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: - pred_jobs = pred.get_tasks(index) - if isinstance(pred_jobs, StateArray): - pred_inds = [j.state_index for j in pred_jobs] - else: - pred_inds = [pred_jobs.state_index] + pred_inds = [j.state_index for j in pred.matching_jobs(index)] if not all(i in pred.successful for i in pred_inds): is_runnable = False blocked = True diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index dba7360d11..07aa09b446 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -1033,7 +1033,7 @@ def test_task_state_comb_1(plugin_dask_opt, tmp_path): assert state.splitter_final is None assert state.splitter_rpn_final == [] - with Submitter(worker=plugin_dask_opt, cache_dir=tmp_path) as sub: + with Submitter(worker="debug", cache_dir=tmp_path) as sub: results = sub(nn) assert not results.errored, "\n".join(results.errors["error message"]) @@ -1147,7 +1147,7 @@ def test_task_state_comb_2( assert state.splitter_rpn == state_rpn assert state.combiner == state_combiner - with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + with Submitter(worker="debug", cache_dir=tmp_path) as sub: results = sub(nn) assert not results.errored, "\n".join(results.errors["error message"]) @@ -1161,18 +1161,7 @@ def test_task_state_comb_2( # it should give values of inputs that corresponds to the specific element # results_verb = nn.result(return_inputs=True) - if state.splitter_rpn_final: - for i, res in enumerate(expected): - assert results.outputs.out == res - # results_verb - # for i, res_l in enumerate(expected_val): - # for j, res in enumerate(res_l): - # assert (results_verb[i][j][0], results_verb[i][j][1].output.out) == res - # if the combiner is full expected is "a flat list" - else: - assert results.outputs.out == expected - # for i, res in enumerate(expected_val): - # assert (results_verb[i][0], results_verb[i][1].output.out) == res + assert results.outputs.out == expected def test_task_state_comb_singl_1(plugin, tmp_path): From c5da03b81ae3d7056533e85360425cace5746083 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 18:46:32 +1100 Subject: [PATCH 318/342] debugging node state operations --- pydra/engine/core.py | 3 ++- pydra/engine/node.py | 25 +++++++++++++++---------- pydra/engine/submitter.py | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index d2fbdcf769..c3912cdabb 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -544,6 +544,7 @@ def _check_for_hash_changes(self): details = "" for changed in hash_changes: field = getattr(attr.fields(type(self.definition)), changed) + hash_function(getattr(self.definition, changed)) val = getattr(self.definition, changed) field_type = type(val) if inspect.isclass(field.type) and issubclass(field.type, FileSet): @@ -570,7 +571,7 @@ def _check_for_hash_changes(self): if hash_changes: raise RuntimeError( f"Input field hashes have changed during the execution of the " - f"'{self.name}' {type(self).__name__}.\n\n{details}" + f"'{self.name}' task of {type(self)} type.\n\n{details}" ) logger.debug( "Input values and hashes for '%s' %s node:\n%s\n%s", diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 5d3e9b1cef..bcddc0919a 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -45,12 +45,12 @@ class Node(ty.Generic[OutputType]): init=False, default=None, eq=False, hash=False, repr=False ) _state: State | None = attrs.field(init=False, default=NOT_SET) - _cont_dim: dict[str, int] | None = attrs.field( - init=False, default=None - ) # QUESTION: should this be included in the state? - _inner_cont_dim: dict[str, int] = attrs.field( - init=False, factory=dict - ) # QUESTION: should this be included in the state? + # _cont_dim: dict[str, int] | None = attrs.field( + # init=False, default=None + # ) # QUESTION: should this be included in the state? + # _inner_cont_dim: dict[str, int] = attrs.field( + # init=False, factory=dict + # ) # QUESTION: should this be included in the state? def __attrs_post_init__(self): self._set_state() @@ -179,16 +179,20 @@ def _check_if_outputs_have_been_used(self, msg): def _set_state(self) -> None: # Add node name to state's splitter, combiner and cont_dim loaded from the def - splitter = self._definition._splitter - combiner = self._definition._combiner + splitter = deepcopy( + self._definition._splitter + ) # these can be modified by the state + combiner = deepcopy( + self._definition._combiner + ) # these can be modified by the state if splitter: splitter = hlpst.add_name_splitter(splitter, self.name) if combiner: combiner = hlpst.add_name_combiner(combiner, self.name) if self._definition._cont_dim: - self._cont_dim = {} + cont_dim = {} for key, val in self._definition._cont_dim.items(): - self._cont_dim[f"{self.name}.{key}"] = val + cont_dim[f"{self.name}.{key}"] = val other_states = self._get_upstream_states() if splitter or combiner or other_states: self._state = State( @@ -197,6 +201,7 @@ def _set_state(self) -> None: splitter=splitter, other_states=other_states, combiner=combiner, + cont_dim=cont_dim, ) if combiner: if not_split := [ diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 9bdcaa8f97..d25ec2a2a4 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -215,6 +215,7 @@ def __call__( definition=task_def, splitter=deepcopy(task_def._splitter), combiner=deepcopy(task_def._combiner), + cont_dim=deepcopy(task_def._cont_dim), ) def wrap_type(tp): From 561165100daf09588250a0634c962618ad738aa1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 18:48:54 +1100 Subject: [PATCH 319/342] fixed bug in _set_state --- pydra/engine/node.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index bcddc0919a..cfb5c9ba1e 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -181,16 +181,16 @@ def _set_state(self) -> None: # Add node name to state's splitter, combiner and cont_dim loaded from the def splitter = deepcopy( self._definition._splitter - ) # these can be modified by the state + ) # these can be modified in state combiner = deepcopy( self._definition._combiner - ) # these can be modified by the state + ) # these can be modified in state + cont_dim = {} if splitter: splitter = hlpst.add_name_splitter(splitter, self.name) if combiner: combiner = hlpst.add_name_combiner(combiner, self.name) if self._definition._cont_dim: - cont_dim = {} for key, val in self._definition._cont_dim.items(): cont_dim[f"{self.name}.{key}"] = val other_states = self._get_upstream_states() From 7b64080c9edf120d8d894215800bb891999062b1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 21:02:45 +1100 Subject: [PATCH 320/342] implemented state depth() implementation by handling the RPN representation properly --- pydra/engine/state.py | 47 +++++++++++++++------------- pydra/engine/tests/test_node_task.py | 8 ++--- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index a5f0cba3bf..7d5a803f61 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -3,7 +3,6 @@ from copy import deepcopy import itertools from collections import OrderedDict -from operator import itemgetter from functools import reduce import typing as ty from . import helpers_state as hlpst @@ -41,7 +40,7 @@ def __init__(self, indices: dict[str, int] | None = None): if indices is None: self.indices = OrderedDict() else: - self.indices = OrderedDict(sorted(indices.items())) + self.indices = OrderedDict(indices.items()) def __len__(self) -> int: return len(self.indices) @@ -53,13 +52,12 @@ def __getitem__(self, key: str) -> int: return self.indices[key] def __lt__(self, other: "StateIndex") -> bool: - if set(self.indices) != set(other.indices): + if list(self.indices) != list(other.indices): raise ValueError( - f"StateIndex {self} does not contain the same indices as {other}" + f"StateIndex {self} does not contain the same indices in the same order " + f"as {other}: {list(self.indices)} != {list(other.indices)}" ) - return sorted(self.indices.items(), key=itemgetter(0)) < sorted( - other.indices.items(), key=itemgetter(0) - ) + return tuple(self.indices.items()) < tuple(other.indices.items()) def __repr__(self) -> str: return ( @@ -273,24 +271,29 @@ def depth(self, after_combine: bool = True) -> int: int number of splits in the state (i.e. linked splits only add 1) """ - depth = 0 - stack = [] - def included(s): - return s not in self.combiner if after_combine else True + # replace field names with 1 or 0 (1 if the field is included in the state) + include_rpn = [ + ( + s + if s in [".", "*"] + else (int(s not in self.combiner) if after_combine else 1) + ) + for s in self.splitter_rpn + ] - for spl in self.splitter_rpn: - if spl in [".", "*"]: - if spl == ".": - depth += int(all(included(s) for s in stack)) - else: - assert spl == "*" - depth += len([s for s in stack if included(s)]) - stack = [] + stack = [] + for opr in include_rpn: + if opr == ".": + assert len(stack) >= 2 + stack.append(stack.pop() and stack.pop()) + elif opr == "*": + assert len(stack) >= 2 + stack.append(stack.pop() + stack.pop()) else: - stack.append(spl) - remaining_stack = [s for s in stack if included(s)] - return depth + len(remaining_stack) + stack.append(opr) + assert len(stack) == 1 + return stack[0] def nest_output_type(self, type_: type) -> type: """Nests a type of an output field in a combination of lists and state-arrays diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 07aa09b446..853af6e51e 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -995,9 +995,7 @@ def test_task_state_6(plugin, tmp_path): assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - - for i, expected in enumerate([3, 2, 33, 12]): - assert results.outputs.out[i] == expected + assert results.outputs.out == [3.0, 2.0, 33.0, 12.0] def test_task_state_6a(plugin, tmp_path): @@ -1014,9 +1012,7 @@ def test_task_state_6a(plugin, tmp_path): assert not results.errored, "\n".join(results.errors["error message"]) # checking the results - - for i, expected in enumerate([3, 2, 33, 12]): - assert results.outputs.out[i] == expected + assert results.outputs.out == [3.0, 2.0, 33.0, 12.0] @pytest.mark.flaky(reruns=2) # when dask From dfa195cde064bd1b2b4a5063e0428c07e2c2d4e1 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 21:10:40 +1100 Subject: [PATCH 321/342] replaced 'after_combine=False' with before_combine=True for state.depth() method arg --- pydra/engine/lazy.py | 2 +- pydra/engine/state.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index ecbaa21111..035a026d55 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -193,7 +193,7 @@ def retrieve_from_job(job: "Task[DefType]") -> ty.Any: val = self._apply_cast(val) return val - if not self._node.state.depth(after_combine=False): + if not self._node.state.depth(before_combine=True): assert len(jobs) == 1 return retrieve_from_job(jobs[0]) elif not self._node.state.keys_final: # all states are combined over diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 7d5a803f61..e6db0acc76 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -256,13 +256,13 @@ def names(self): names.append(token) return names - def depth(self, after_combine: bool = True) -> int: + def depth(self, before_combine: bool = True) -> int: """Return the number of splits of the state, i.e. the number nested state arrays to wrap around the type of lazy out fields Parameters ---------- - after_combine : :obj:`bool` + before_combine : :obj:`bool` if True, the depth is after combining the fields, otherwise it is before any combinations @@ -277,7 +277,7 @@ def depth(self, after_combine: bool = True) -> int: ( s if s in [".", "*"] - else (int(s not in self.combiner) if after_combine else 1) + else (1 if before_combine else int(s not in self.combiner)) ) for s in self.splitter_rpn ] @@ -286,7 +286,9 @@ def depth(self, after_combine: bool = True) -> int: for opr in include_rpn: if opr == ".": assert len(stack) >= 2 - stack.append(stack.pop() and stack.pop()) + opr1 = stack.pop() + opr2 = stack.pop() + stack.append(opr1 and opr2) elif opr == "*": assert len(stack) >= 2 stack.append(stack.pop() + stack.pop()) @@ -313,7 +315,7 @@ def nest_output_type(self, type_: type) -> type: state_array_depth = self.depth() # If there is a combination, it will get flattened into a single list - if self.depth(after_combine=False) > state_array_depth: + if self.depth(before_combine=True) > state_array_depth: type_ = list[type_] # Nest the uncombined state arrays around the type From 8fa2d946690c6666be62358ed73cbb19c6d76fa8 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 21:46:38 +1100 Subject: [PATCH 322/342] removed task definition attribute from state --- pydra/design/tests/test_shell.py | 3 +- pydra/engine/core.py | 24 +-- pydra/engine/lazy.py | 2 +- pydra/engine/node.py | 5 +- pydra/engine/state.py | 19 +-- pydra/engine/submitter.py | 4 +- pydra/engine/tests/test_node_task.py | 2 +- pydra/engine/tests/test_specs.py | 2 +- pydra/engine/tests/test_state.py | 209 +++++++++++---------------- 9 files changed, 98 insertions(+), 172 deletions(-) diff --git a/pydra/design/tests/test_shell.py b/pydra/design/tests/test_shell.py index 7269166ad1..7a3a1896fb 100644 --- a/pydra/design/tests/test_shell.py +++ b/pydra/design/tests/test_shell.py @@ -555,7 +555,6 @@ class Outputs(ShellOutputs): argstr="-T", default=False, requires=["long_format"], - xor=["complete_date", "date_format_str", None], ), "date_format_str": shell.arg( type=str | None, @@ -563,7 +562,6 @@ class Outputs(ShellOutputs): default=None, argstr="-D", requires=["long_format"], - xor=["date_format_str", "complete_date", None], ), }, outputs={ @@ -573,6 +571,7 @@ class Outputs(ShellOutputs): callable=list_entries, ) }, + xor=["complete_date", "date_format_str", None], name="Ls", ) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index c3912cdabb..7318a6e7bb 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -225,26 +225,6 @@ def uid(self): """ return self._uid - def set_state(self, splitter, combiner=None): - """ - Set a particular state on this task. - - Parameters - ---------- - splitter : - TODO - combiner : - TODO - - """ - if splitter is not None: - self.state = state.State( - name=self.name, splitter=splitter, combiner=combiner - ) - else: - self.state = None - return self.state - @property def output_names(self): """Get the names of the outputs from the task's output_spec @@ -878,7 +858,9 @@ def _create_graph( node.state and f"{node.name}.{field.name}" in node.state.splitter ): - node._inner_cont_dim[f"{node.name}.{field.name}"] = 1 + node.state._inner_cont_dim[ + f"{node.name}.{field.name}" + ] = 1 # adding task_name: (task.state, [a field from the connection] if lf._node.name not in other_states: other_states[lf._node.name] = ( diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 035a026d55..457516e2b0 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -193,7 +193,7 @@ def retrieve_from_job(job: "Task[DefType]") -> ty.Any: val = self._apply_cast(val) return val - if not self._node.state.depth(before_combine=True): + if not self._node.state or not self._node.state.depth(before_combine=True): assert len(jobs) == 1 return retrieve_from_job(jobs[0]) elif not self._node.state.keys_final: # all states are combined over diff --git a/pydra/engine/node.py b/pydra/engine/node.py index cfb5c9ba1e..6b3c77bd11 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -140,7 +140,7 @@ def lzout(self) -> OutputType: def cont_dim(self): # adding inner_cont_dim to the general container_dimension provided by the users cont_dim_all = deepcopy(self._cont_dim) - for k, v in self._inner_cont_dim.items(): + for k, v in self.state._inner_cont_dim.items(): cont_dim_all[k] = cont_dim_all.get(k, 1) + v return cont_dim_all @@ -197,7 +197,6 @@ def _set_state(self) -> None: if splitter or combiner or other_states: self._state = State( self.name, - self._definition, splitter=splitter, other_states=other_states, combiner=combiner, @@ -228,7 +227,7 @@ def _get_upstream_states(self) -> dict[str, tuple["State", list[str]]]: node: Node = val._node # variables that are part of inner splitters should be treated as a containers if node.state and f"{node.name}.{inpt_name}" in node.state.splitter: - node._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 + node.state._inner_cont_dim[f"{node.name}.{inpt_name}"] = 1 # adding task_name: (task.state, [a field from the connection] if node.name not in upstream_states: upstream_states[node.name] = (node.state, [inpt_name]) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index e6db0acc76..38c4acb90d 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -6,13 +6,9 @@ from functools import reduce import typing as ty from . import helpers_state as hlpst -from .helpers import ensure_list, attrs_values +from .helpers import ensure_list from pydra.utils.typing import StateArray, TypeParser -# from .specs import BaseDef -if ty.TYPE_CHECKING: - from .specs import TaskDef - # TODO: move to State op = {".": zip, "*": itertools.product} @@ -196,7 +192,6 @@ class State: def __init__( self, name, - definition: "TaskDef", splitter=None, combiner=None, cont_dim=None, @@ -219,12 +214,12 @@ def __init__( """ self.name = name - self.definition = definition self.other_states = other_states self.splitter = splitter # temporary combiner self.combiner = combiner self.cont_dim = cont_dim or {} + self._inner_cont_dim = {} self._inputs_ind = None # if other_states, the connections have to be updated if self.other_states: @@ -418,8 +413,9 @@ def inputs_ind(self): (i.e. inputs that are relevant for current task, can be outputs from previous nodes) """ if self._inputs_ind is None: - self.prepare_states() - self.prepare_inputs() + raise RuntimeError( + "inputs_ind is not set, please run prepare_states() on the state first" + ) return self._inputs_ind @current_splitter.setter @@ -987,11 +983,6 @@ def prepare_states( self.splitter_validation() self.combiner_validation() self.set_input_groups() - # container dimension for each input, specifies how nested the input is - if inputs is None: - inputs = { - f"{self.name}.{n}": v for n, v in attrs_values(self.definition).items() - } self.inputs = inputs if not self.cont_dim: self.cont_dim = cont_dim or {} diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index d25ec2a2a4..dc9fea38ae 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -212,7 +212,6 @@ def __call__( state = State( name="outer_split", - definition=task_def, splitter=deepcopy(task_def._splitter), combiner=deepcopy(task_def._combiner), cont_dim=deepcopy(task_def._cont_dim), @@ -697,6 +696,9 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: if not self.node.state: return {None: self.node._definition} split_defs = {} + if self.state._inputs_ind is None: + self.state.prepare_states(self.node.inputs) + self.state.prepare_inputs() for input_ind in self.node.state.inputs_ind: resolved = {} for inp in set(self.node.input_names): diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 853af6e51e..32aa200bfb 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -48,7 +48,7 @@ def get_state(task: TaskDef, name="NA") -> State: wf.add(task, name=name) node = wf[name] if node.state: - node.state.prepare_states() + node.state.prepare_states(inputs=node.inputs) node.state.prepare_inputs() return node.state diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index f607b1a67e..4c95d2f66e 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -40,7 +40,7 @@ def wf(workflow_task: WorkflowDef) -> Workflow: wf = Workflow.construct(workflow_task) for n in wf.nodes: if n._state: - n._state.prepare_states() + n._state.prepare_states(inputs=n.inputs) n._state.prepare_inputs() return wf diff --git a/pydra/engine/tests/test_state.py b/pydra/engine/tests/test_state.py index 47a695adf1..104bd0f549 100644 --- a/pydra/engine/tests/test_state.py +++ b/pydra/engine/tests/test_state.py @@ -80,7 +80,7 @@ def test_state_1( inputs, splitter, ndim, states_ind, states_val, group_for_inputs, groups_stack ): """single state: testing groups, prepare_states and prepare_inputs""" - st = State(definition=example_def, name="NA", splitter=splitter) + st = State(name="NA", splitter=splitter) assert st.splitter == st.current_splitter assert st.splitter_rpn == st.current_splitter_rpn assert st.prev_state_splitter is None @@ -99,27 +99,25 @@ def test_state_1( def test_state_2_err(): with pytest.raises(PydraStateError) as exinfo: - State(definition=example_def, name="NA", splitter={"a"}) + State(name="NA", splitter={"a"}) assert "splitter has to be a string, a tuple or a list" == str(exinfo.value) def test_state_3_err(): with pytest.raises(PydraStateError) as exinfo: - State( - definition=example_def, name="NA", splitter=["a", "b"], combiner=("a", "b") - ) + State(name="NA", splitter=["a", "b"], combiner=("a", "b")) assert "combiner has to be a string or a list" == str(exinfo.value) def test_state_4_err(): - st = State(definition=example_def, name="NA", splitter="a", combiner=["a", "b"]) + st = State(name="NA", splitter="a", combiner=["a", "b"]) with pytest.raises(PydraStateError) as exinfo: st.combiner_validation() assert "all combiners have to be in the splitter" in str(exinfo.value) def test_state_5_err(): - st = State(definition=example_def, name="NA", combiner="a") + st = State(name="NA", combiner="a") with pytest.raises(PydraStateError) as exinfo: st.combiner_validation() assert "splitter has to be set before" in str(exinfo.value) @@ -337,7 +335,7 @@ def test_state_6(splitter, cont_dim, values, keys, splits): keys = [f"S.{k}" for k in keys] splits = [{f"S.{k}": v for k, v in el.items()} for el in splits] - st = State(definition=example_def, splitter=splitter, name="S") + st = State(splitter=splitter, name="S") st.prepare_states(inputs=inputs, cont_dim=cont_dim) # checking keys and splits @@ -373,7 +371,7 @@ def test_state_7(splitter, cont_dim, inputs, mismatch): cont_dim = {f"S.{k}": v for k, v in cont_dim.items()} inputs = {f"S.{k}": v for k, v in inputs.items()} - st = State(definition=example_def, splitter=splitter, name="S") + st = State(splitter=splitter, name="S") if mismatch: with pytest.raises(ValueError): @@ -423,7 +421,7 @@ def test_state_8(splitter, cont_dim, values, keys, shapes, splits): keys = [f"S.{k}" for k in keys] splits = [{f"S.{k}": v for k, v in el.items()} for el in splits] - st = State(definition=example_def, splitter=splitter, name="S") + st = State(splitter=splitter, name="S") st.prepare_states(inputs=inputs, cont_dim=cont_dim) # checking keys and splits @@ -464,7 +462,7 @@ def test_state_9(splitter, values, keys, splits): keys = [f"S.{k}" for k in keys] splits = [{f"S.{k}": v for k, v in el.items()} for el in splits] - st = State(definition=example_def, splitter=splitter, name="S") + st = State(splitter=splitter, name="S") st.prepare_states(inputs=inputs) # checking keys and splits @@ -480,8 +478,8 @@ def test_state_connect_1(): """two 'connected' states: testing groups, prepare_states and prepare_inputs no explicit splitter for the second state """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "b")}) + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", other_states={"NA": (st1, "b")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] assert st2.prev_state_splitter == st2.splitter @@ -503,9 +501,8 @@ def test_state_connect_1a(): """two 'connected' states: testing groups, prepare_states and prepare_inputs the second state has explicit splitter from the first one (the prev-state part) """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter="_NA", other_states={"NA": (st1, "b")}, @@ -525,8 +522,8 @@ def test_state_connect_1a(): def test_state_connect_1b_exception(): """can't provide explicitly NA.a (should be _NA)""" - State(definition=example_def, name="NA", splitter="a", other_states={}) - st2 = State(definition=example_def, name="NB", splitter="NA.a") + State(name="NA", splitter="a", other_states={}) + st2 = State(name="NB", splitter="NA.a") with pytest.raises(PydraStateError) as excinfo: st2.splitter_validation() assert "consider using _NA" in str(excinfo.value) @@ -537,7 +534,6 @@ def test_state_connect_1c_exception(splitter2, other_states2): """can't ask for splitter from node that is not connected""" with pytest.raises(PydraStateError): st2 = State( - definition=example_def, name="NB", splitter=splitter2, other_states=other_states2, @@ -550,9 +546,8 @@ def test_state_connect_2(): the second state has explicit splitter that contains splitter from the first node and a new field (the prev-state and current part) """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["_NA", "a"], other_states={"NA": (st1, "b")}, @@ -599,9 +594,8 @@ def test_state_connect_2a(): splitter from the first node and a new field; adding an additional scalar field that is not part of the splitter """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["_NA", "a"], other_states={"NA": (st1, "b")}, @@ -642,10 +636,8 @@ def test_state_connect_2b(): the second state has explicit splitter with a new field (the current part) splitter from the first node (the prev-state part) has to be added """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State( - definition=example_def, name="NB", splitter="a", other_states={"NA": (st1, "b")} - ) + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a", other_states={"NA": (st1, "b")}) assert st2.splitter == ["_NA", "NB.a"] assert st2.splitter_rpn == ["NA.a", "NB.a", "*"] @@ -682,10 +674,9 @@ def test_state_connect_3(): the third state connected to two previous states; splitter from the previous states (the prev-state part) has to be added """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", other_states={"NA": (st1, "b"), "NB": (st2, "c")}, ) @@ -728,10 +719,9 @@ def test_state_connect_3a(): the third state connected to two previous states; the third state has explicit splitter that contains splitters from previous states """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", splitter=["_NA", "_NB"], other_states={"NA": (st1, "b"), "NB": (st2, "c")}, @@ -771,10 +761,9 @@ def test_state_connect_3b(): the third state has explicit splitter that contains splitter only from the first state. splitter from the second state has to be added (partial prev-state part) """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", splitter="_NB", other_states={"NA": (st1, "b"), "NB": (st2, "c")}, @@ -813,10 +802,9 @@ def test_state_connect_4(): the third state connected to two previous states; the third state has explicit scalar(!) splitter that contains two previous states """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", splitter=("_NA", "_NB"), other_states={"NA": (st1, "b"), "NB": (st2, "c")}, @@ -845,8 +833,8 @@ def test_state_connect_5(): the first state has outer splitter, the second state has no explicit splitter """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) - st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "a")}) + st1 = State(name="NA", splitter=["a", "b"]) + st2 = State(name="NB", other_states={"NA": (st1, "a")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -875,10 +863,9 @@ def test_state_connect_6(): the first state has outer splitter, the third state has explicit splitter with splitters from previous states """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter=["a", "b"]) + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", splitter=["_NA", "_NB"], other_states={"NA": (st1, "a"), "NB": (st2, "b")}, @@ -929,10 +916,9 @@ def test_state_connect_6a(): the first state has outer splitter, the third state has no explicit splitter """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter=["a", "b"]) + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", other_states={"NA": (st1, "a"), "NB": (st2, "b")}, ) @@ -980,10 +966,8 @@ def test_state_connect_7(): """two 'connected' states with multiple fields that are connected no explicit splitter for the second state """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State( - definition=example_def, name="NB", other_states={"NA": (st1, ["x", "y"])} - ) + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", other_states={"NA": (st1, ["x", "y"])}) # should take into account that x, y come from the same task assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] @@ -1008,10 +992,9 @@ def test_state_connect_8(): pydra should recognize, that there is only one splitter - NA and it should give the same as the previous test """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "b")}) + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", other_states={"NA": (st1, "b")}) st3 = State( - definition=example_def, name="NC", other_states={"NA": (st1, "x"), "NB": (st2, "y")}, ) @@ -1043,15 +1026,13 @@ def test_state_connect_9(): pydra should recognize, that there is only one splitter - NA_1 and NA_2 """ - st1 = State(definition=example_def, name="NA_1", splitter="a") - st1a = State(definition=example_def, name="NA_2", splitter="a") + st1 = State(name="NA_1", splitter="a") + st1a = State(name="NA_2", splitter="a") st2 = State( - definition=example_def, name="NB", other_states={"NA_1": (st1, "b"), "NA_2": (st1a, "c")}, ) st3 = State( - definition=example_def, name="NC", other_states={"NA_1": (st1, "x"), "NB": (st2, "y")}, ) @@ -1086,9 +1067,8 @@ def test_state_connect_innerspl_1(): """two 'connected' states: testing groups, prepare_states and prepare_inputs, the second state has an inner splitter, full splitter provided """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["_NA", "b"], other_states={"NA": (st1, "b")}, @@ -1143,10 +1123,8 @@ def test_state_connect_innerspl_1a(): the second state has an inner splitter, splitter from the first state (the prev-state part) has to be added """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State( - definition=example_def, name="NB", splitter="b", other_states={"NA": (st1, "b")} - ) + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="b", other_states={"NA": (st1, "b")}) assert st2.splitter == ["_NA", "NB.b"] assert st2.splitter_rpn == ["NA.a", "NB.b", "*"] @@ -1196,9 +1174,8 @@ def test_state_connect_innerspl_1a(): def test_state_connect_innerspl_1b(): """incorrect splitter - the current & prev-state parts in scalar splitter""" with pytest.raises(PydraStateError): - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") State( - definition=example_def, name="NB", splitter=("_NA", "b"), other_states={"NA": (st1, "b")}, @@ -1210,9 +1187,8 @@ def test_state_connect_innerspl_2(): the second state has one inner splitter and one 'normal' splitter only the current part of the splitter provided (the prev-state has to be added) """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}, @@ -1286,9 +1262,8 @@ def test_state_connect_innerspl_2a(): only the current part of the splitter provided (different order!), """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["b", "c"], other_states={"NA": (st1, "b")}, @@ -1358,16 +1333,13 @@ def test_state_connect_innerspl_3(): the prev-state parts of the splitter have to be added """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}, ) - st3 = State( - definition=example_def, name="NC", splitter="d", other_states={"NB": (st2, "a")} - ) + st3 = State(name="NC", splitter="d", other_states={"NB": (st2, "a")}) assert st3.splitter == ["_NB", "NC.d"] assert st3.splitter_rpn == ["NA.a", "NB.c", "NB.b", "*", "*", "NC.d", "*"] @@ -1504,10 +1476,9 @@ def test_state_connect_innerspl_4(): """three'connected' states: testing groups, prepare_states and prepare_inputs, the third one connected to two previous, only the current part of splitter provided """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter=["b", "c"]) + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter=["b", "c"]) st3 = State( - definition=example_def, name="NC", splitter="d", other_states={"NA": (st1, "e"), "NB": (st2, "f")}, @@ -1592,7 +1563,7 @@ def test_state_connect_innerspl_4(): def test_state_combine_1(): """single state with splitter and combiner""" - st = State(definition=example_def, name="NA", splitter="a", combiner="a") + st = State(name="NA", splitter="a", combiner="a") assert st.splitter == "NA.a" assert st.splitter_rpn == ["NA.a"] assert st.current_combiner == st.current_combiner_all == st.combiner == ["NA.a"] @@ -1612,8 +1583,8 @@ def test_state_combine_1(): def test_state_connect_combine_1(): """two connected states; outer splitter and combiner in the first one""" - st1 = State(definition=example_def, name="NA", splitter=["a", "b"], combiner="a") - st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "c")}) + st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st2 = State(name="NB", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] assert st1.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -1657,10 +1628,8 @@ def test_state_connect_combine_2(): two connected states; outer splitter and combiner in the first one; additional splitter in the second node """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"], combiner="a") - st2 = State( - definition=example_def, name="NB", splitter="d", other_states={"NA": (st1, "c")} - ) + st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st2 = State(name="NB", splitter="d", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] assert st1.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -1721,9 +1690,8 @@ def test_state_connect_combine_3(): two connected states; outer splitter and combiner in the first one; additional splitter in the second node """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"], combiner="a") + st1 = State(name="NA", splitter=["a", "b"], combiner="a") st2 = State( - definition=example_def, name="NB", splitter="d", combiner="d", @@ -1792,9 +1760,8 @@ def test_state_connect_combine_3(): def test_state_connect_innerspl_combine_1(): """one previous node and one inner splitter (and inner splitter combiner); only current part provided - the prev-state part had to be added""" - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["c", "b"], combiner=["b"], @@ -1877,9 +1844,8 @@ def test_state_connect_innerspl_combine_2(): only the current part of the splitter provided, the prev-state part has to be added """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", splitter=["c", "b"], combiner=["c"], @@ -1957,9 +1923,8 @@ def test_state_connect_combine_prevst_1(): the second has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(definition=example_def, name="NA", splitter="a") + st1 = State(name="NA", splitter="a") st2 = State( - definition=example_def, name="NB", other_states={"NA": (st1, "b")}, combiner="NA.a", @@ -1992,9 +1957,8 @@ def test_state_connect_combine_prevst_2(): the second has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st1 = State(name="NA", splitter=["a", "b"]) st2 = State( - definition=example_def, name="NB", other_states={"NA": (st1, "b")}, combiner="NA.a", @@ -2033,10 +1997,9 @@ def test_state_connect_combine_prevst_3(): the third one has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) - st2 = State(definition=example_def, name="NB", other_states={"NA": (st1, "b")}) + st1 = State(name="NA", splitter=["a", "b"]) + st2 = State(name="NB", other_states={"NA": (st1, "b")}) st3 = State( - definition=example_def, name="NC", other_states={"NB": (st2, "c")}, combiner="NA.a", @@ -2074,10 +2037,9 @@ def test_state_connect_combine_prevst_4(): the third state has only the prev-state part of splitter, the third state has also combiner from the prev-state part """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", splitter=["_NA", "_NB"], combiner=["NA.a"], @@ -2127,10 +2089,9 @@ def test_state_connect_combine_prevst_5(): the third state has scalar splitter in the prev-state part, the third state has also combiner from the prev-state part """ - st1 = State(definition=example_def, name="NA", splitter="a") - st2 = State(definition=example_def, name="NB", splitter="a") + st1 = State(name="NA", splitter="a") + st2 = State(name="NB", splitter="a") st3 = State( - definition=example_def, name="NC", splitter=("_NA", "_NB"), combiner=["NA.a"], @@ -2162,9 +2123,8 @@ def test_state_connect_combine_prevst_6(): the second also has combiner from the first state (i.e. from the prev-state part of the splitter), """ - st1 = State(definition=example_def, name="NA", splitter=["a", "b"]) + st1 = State(name="NA", splitter=["a", "b"]) st2 = State( - definition=example_def, name="NB", splitter="c", other_states={"NA": (st1, "b")}, @@ -2242,21 +2202,21 @@ def function(self): [ ( None, - {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + {"NA": (State(name="NA", splitter="a"), "b")}, "_NA", "_NA", None, ), ( "b", - {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + {"NA": (State(name="NA", splitter="a"), "b")}, ["_NA", "CN.b"], "_NA", "CN.b", ), ( ("b", "c"), - {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + {"NA": (State(name="NA", splitter="a"), "b")}, ["_NA", ("CN.b", "CN.c")], "_NA", ("CN.b", "CN.c"), @@ -2264,8 +2224,8 @@ def function(self): ( None, { - "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), - "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), + "NA": (State(name="NA", splitter="a"), "a"), + "NB": (State(name="NB", splitter="a"), "b"), }, ["_NA", "_NB"], ["_NA", "_NB"], @@ -2274,8 +2234,8 @@ def function(self): ( "b", { - "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), - "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), + "NA": (State(name="NA", splitter="a"), "a"), + "NB": (State(name="NB", splitter="a"), "b"), }, [["_NA", "_NB"], "CN.b"], ["_NA", "_NB"], @@ -2284,8 +2244,8 @@ def function(self): ( ["_NA", "b"], { - "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), - "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), + "NA": (State(name="NA", splitter="a"), "a"), + "NB": (State(name="NB", splitter="a"), "b"), }, [["_NB", "_NA"], "CN.b"], ["_NB", "_NA"], @@ -2296,9 +2256,7 @@ def function(self): def test_connect_splitters( splitter, other_states, expected_splitter, expected_prevst, expected_current ): - st = State( - definition=example_def, name="CN", splitter=splitter, other_states=other_states - ) + st = State(name="CN", splitter=splitter, other_states=other_states) st.set_input_groups() assert st.splitter == expected_splitter assert st.prev_state_splitter == expected_prevst @@ -2310,17 +2268,17 @@ def test_connect_splitters( [ ( ("_NA", "b"), - {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + {"NA": (State(name="NA", splitter="a"), "b")}, ), ( ["b", "_NA"], - {"NA": (State(definition=example_def, name="NA", splitter="a"), "b")}, + {"NA": (State(name="NA", splitter="a"), "b")}, ), ( ["_NB", ["_NA", "b"]], { - "NA": (State(definition=example_def, name="NA", splitter="a"), "a"), - "NB": (State(definition=example_def, name="NB", splitter="a"), "b"), + "NA": (State(name="NA", splitter="a"), "a"), + "NB": (State(name="NB", splitter="a"), "b"), }, ), ], @@ -2328,7 +2286,6 @@ def test_connect_splitters( def test_connect_splitters_exception_1(splitter, other_states): with pytest.raises(PydraStateError) as excinfo: State( - definition=example_def, name="CN", splitter=splitter, other_states=other_states, @@ -2339,12 +2296,9 @@ def test_connect_splitters_exception_1(splitter, other_states): def test_connect_splitters_exception_2(): with pytest.raises(PydraStateError) as excinfo: st = State( - definition=example_def, name="CN", splitter="_NB", - other_states={ - "NA": (State(definition=example_def, name="NA", splitter="a"), "b") - }, + other_states={"NA": (State(name="NA", splitter="a"), "b")}, ) st.set_input_groups() assert "can't ask for splitter from NB" in str(excinfo.value) @@ -2353,12 +2307,11 @@ def test_connect_splitters_exception_2(): def test_connect_splitters_exception_3(): with pytest.raises(PydraStateError) as excinfo: State( - definition=example_def, name="CN", splitter="_NB", other_states=[ "NA", - (State(definition=example_def, name="NA", splitter="a"), "b"), + (State(name="NA", splitter="a"), "b"), ], ) assert "other states has to be a dictionary" == str(excinfo.value) From 0b01701ce7a906fa3d79f5992ecb9d185e289ca3 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Fri, 7 Mar 2025 21:47:23 +1100 Subject: [PATCH 323/342] cleaned up comment --- pydra/engine/state.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 38c4acb90d..490a9fc3cb 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -10,7 +10,6 @@ from pydra.utils.typing import StateArray, TypeParser -# TODO: move to State op = {".": zip, "*": itertools.product} From 1455c2ef0283a911840d3bd6793fe056d1cf1b89 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 8 Mar 2025 06:11:35 +1100 Subject: [PATCH 324/342] debugging issues with nested output types --- pydra/engine/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 490a9fc3cb..dacd7cda69 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -35,7 +35,7 @@ def __init__(self, indices: dict[str, int] | None = None): if indices is None: self.indices = OrderedDict() else: - self.indices = OrderedDict(indices.items()) + self.indices = OrderedDict(sorted(indices.items())) def __len__(self) -> int: return len(self.indices) @@ -250,7 +250,7 @@ def names(self): names.append(token) return names - def depth(self, before_combine: bool = True) -> int: + def depth(self, before_combine: bool = False) -> int: """Return the number of splits of the state, i.e. the number nested state arrays to wrap around the type of lazy out fields From 6980cf0f2ec276b44390a7ae0c8104387a8b816c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 8 Mar 2025 06:29:11 +1100 Subject: [PATCH 325/342] debugging removal of definition from state --- pydra/engine/node.py | 6 ++++++ pydra/engine/state.py | 1 + pydra/engine/submitter.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 6b3c77bd11..8b0947343b 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -69,6 +69,12 @@ def __init__(self, node: "Node") -> None: def __getattr__(self, name: str) -> ty.Any: return getattr(self._node._definition, name) + def __getstate__(self) -> ty.Dict[str, ty.Any]: + return {"_node": self._node} + + def __setstate__(self, state: ty.Dict[str, ty.Any]) -> None: + super().__setattr__("_node", state["_node"]) + def __setattr__(self, name: str, value: ty.Any) -> None: setattr(self._node._definition, name, value) if is_lazy(value): diff --git a/pydra/engine/state.py b/pydra/engine/state.py index dacd7cda69..9556aeff74 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -978,6 +978,7 @@ def prepare_states( State Values specific elements from inputs that can be used running interfaces """ + assert isinstance(inputs, dict) # checking if splitter and combiner have valid forms self.splitter_validation() self.combiner_validation() diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index dc9fea38ae..7db346e1ac 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -697,7 +697,7 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: return {None: self.node._definition} split_defs = {} if self.state._inputs_ind is None: - self.state.prepare_states(self.node.inputs) + self.state.prepare_states(attrs_values(self.node._definition)) self.state.prepare_inputs() for input_ind in self.node.state.inputs_ind: resolved = {} From a1e32683d6b3624427474d969bd4f5ae81def187 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Sat, 8 Mar 2025 10:23:16 +1100 Subject: [PATCH 326/342] debugging test_shelltask --- pydra/design/shell.py | 19 ++- pydra/engine/core.py | 4 +- pydra/engine/environments.py | 29 ++-- pydra/engine/helpers_file.py | 4 +- pydra/engine/lazy.py | 6 +- pydra/engine/node.py | 14 ++ pydra/engine/specs.py | 36 ++--- pydra/engine/state.py | 4 +- pydra/engine/submitter.py | 29 ++-- pydra/engine/tests/test_dockertask.py | 30 ++-- pydra/engine/tests/test_helpers.py | 6 +- pydra/engine/tests/test_node_task.py | 7 +- pydra/engine/tests/test_shelltask.py | 215 ++++++++------------------ pydra/engine/tests/test_specs.py | 2 +- 14 files changed, 170 insertions(+), 235 deletions(-) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 4a53208e23..1cc83d2b15 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -164,11 +164,16 @@ def _callable_validator(self, _, value): if value: if not callable(value): raise ValueError(f"callable must be a function, not {value!r}") - elif not getattr(self, "path_template", None) and self.name not in [ - "return_code", - "stdout", - "stderr", - ]: # ShellOutputs.BASE_NAMES + elif ( + self.default is NO_DEFAULT + and not getattr(self, "path_template", None) + and self.name + not in [ + "return_code", + "stdout", + "stderr", + ] + ): # ShellOutputs.BASE_NAMES raise ValueError( "A shell output field must have either a callable or a path_template" ) @@ -239,8 +244,8 @@ def _validate_path_template(self, attribute, value): if value: if self.default not in (NO_DEFAULT, True, None): raise ValueError( - f"path_template ({value!r}) can only be provided when no default " - f"({self.default!r}) is provided" + f"path_template ({value!r}) can only be provided when there is no " + f"default value provided ({self.default!r})" ) if not (is_fileset_or_union(self.type) or self.type is ty.Any): raise ValueError( diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 7318a6e7bb..c5f7f32f5f 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -274,9 +274,7 @@ def inputs(self) -> dict[str, ty.Any]: for fld in list_fields(self.definition): name = fld.name value = self._inputs[name] - if value is not attr.NOTHING and TypeParser.contains_type( - FileSet, fld.type - ): + if value and TypeParser.contains_type(FileSet, fld.type): copied_value = copy_nested_files( value=value, dest_dir=self.output_dir, diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 02efd902e4..9701e6f7ef 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -50,10 +50,11 @@ class Native(Environment): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] - values = execute(task.definition._command_args()) + cmd_args = task.definition._command_args(values=task.inputs) + values = execute(cmd_args) output = dict(zip(keys, values)) if output["return_code"]: - msg = f"Error running '{task.name}' task with {task.definition._command_args()}:" + msg = f"Error running '{task.name}' task with {cmd_args}:" if output["stderr"]: msg += "\n\nstderr:\n" + output["stderr"] if output["stdout"]: @@ -113,7 +114,7 @@ def get_bindings( from pydra.design import shell bindings: dict[str, tuple[str, str]] = {} - input_updates: dict[str, tuple[Path, ...]] = {} + value_updates: dict[str, tuple[Path, ...]] = {} if root is None: return bindings fld: shell.arg @@ -142,7 +143,7 @@ def get_bindings( # that path relative to the location in the mount point in the container. # If it is a more complex file-set with multiple paths, then it is converted # into a tuple of paths relative to the base of the fileset. - input_updates[fld.name] = ( + value_updates[fld.name] = ( env_path / fileset.name if isinstance(fileset, os.PathLike) else tuple(env_path / rel for rel in fileset.relative_fspaths) @@ -151,7 +152,11 @@ def get_bindings( # Add the cache directory to the list of mounts bindings[task.cache_dir] = (f"{self.root}/{task.cache_dir}", "rw") - return bindings, input_updates + # Update values with the new paths + values = copy(task.inputs) + values.update(value_updates) + + return bindings, values class Docker(Container): @@ -160,7 +165,7 @@ class Docker(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: docker_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = self.get_bindings(task=task, root=self.root) + mounts, values = self.get_bindings(task=task, root=self.root) docker_args = [ "docker", @@ -176,11 +181,7 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: keys = ["return_code", "stdout", "stderr"] values = execute( - docker_args - + [docker_img] - + task.definition._command_args( - root=self.root, value_updates=input_updates - ), + docker_args + [docker_img] + task.definition._command_args(values=values), ) output = dict(zip(keys, values)) if output["return_code"]: @@ -197,7 +198,7 @@ class Singularity(Container): def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: singularity_img = f"{self.image}:{self.tag}" # mounting all input locations - mounts, input_updates = self.get_bindings(task=task, root=self.root) + mounts, values = self.get_bindings(task=task, root=self.root) # todo adding xargsy etc singularity_args = [ @@ -216,9 +217,7 @@ def execute(self, task: "Task[ShellDef]") -> dict[str, ty.Any]: values = execute( singularity_args + [singularity_img] - + task.definition._command_args( - root=self.root, value_updates=input_updates - ), + + task.definition._command_args(values=values), ) output = dict(zip(keys, values)) if output["return_code"]: diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 8234975c80..736a6e2729 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -112,7 +112,9 @@ def copy_fileset(fileset: FileSet): # not sure if this might be useful for Function Task def template_update( - definition, output_dir: Path, map_copyfiles: dict[str, Path] | None = None + definition, + output_dir: Path | None = None, + map_copyfiles: dict[str, Path] | None = None, ): """ Update all templates that are present in the input definition. diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 457516e2b0..0739a67cd2 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -1,6 +1,5 @@ import typing as ty import abc -from operator import attrgetter import attrs from pydra.utils.typing import StateArray from pydra.utils.hash import hash_single @@ -158,10 +157,7 @@ def _get_value( if state_index is None: state_index = StateIndex() - jobs = sorted( - graph.node(self._node.name).matching_jobs(state_index), - key=attrgetter("state_index"), - ) + jobs = graph.node(self._node.name).matching_jobs(state_index) def retrieve_from_job(job: "Task[DefType]") -> ty.Any: if job.errored: diff --git a/pydra/engine/node.py b/pydra/engine/node.py index 8b0947343b..a5061fb089 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -108,6 +108,20 @@ def state(self): def input_values(self) -> tuple[tuple[str, ty.Any]]: return tuple(attrs_values(self._definition).items()) + @property + def state_values(self) -> dict[str, ty.Any]: + """Get the values of the task definition, scoped by the name of the node to be + used in the state + + Returns + ------- + dict[str, Any] + The values of the task definition + """ + return { + f"{self.name}.{n}": v for n, v in attrs_values(self._definition).items() + } + @property def lzout(self) -> OutputType: from pydra.engine.helpers import list_fields diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index c258f9735e..7d48870d76 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -535,7 +535,8 @@ def _rule_violations(self) -> list[str]: are_set = [f"{n}={v!r}" for n, v in mutually_exclusive.items() if v] if len(are_set) > 1: errors.append( - f"Mutually exclusive fields ({', '.join(are_set)}) are set together" + f"Mutually exclusive fields ({', '.join(sorted(are_set))}) are set " + "together" ) elif not are_set and None not in xor_set: errors.append( @@ -915,7 +916,7 @@ def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: if default.exists(): return default else: - raise AttributeError(f"file {default} does not exist") + raise FileNotFoundError(f"file {default} does not exist") else: all_files = [Path(el) for el in glob(default.expanduser())] if len(all_files) > 1: @@ -923,7 +924,7 @@ def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: elif len(all_files) == 1: return all_files[0] else: - raise AttributeError(f"no file matches {default.name}") + raise FileNotFoundError(f"no file matches {default.name}") return default @classmethod @@ -1038,7 +1039,9 @@ def cmdline(self) -> str: """The equivalent command line that would be submitted if the task were run on the current working directory.""" # Skip the executable, which can be a multi-part command, e.g. 'docker run'. - cmd_args = self._command_args() + values = attrs_values(self) + values.update(template_update(self)) + cmd_args = self._command_args(values=values) cmdline = cmd_args[0] for arg in cmd_args[1:]: # If there are spaces in the arg, and it is not enclosed by matching @@ -1050,29 +1053,16 @@ def cmdline(self) -> str: cmdline += " " + arg return cmdline - def _command_args( - self, - output_dir: Path | None = None, - value_updates: dict[str, ty.Any] | None = None, - root: Path | None = None, - ) -> list[str]: + def _command_args(self, values: dict[str, ty.Any]) -> list[str]: """Get command line arguments""" - if output_dir is None: - output_dir = Path.cwd() self._check_resolved() self._check_rules() - values = attrs_values(self) - template_values = template_update(self, output_dir=output_dir) - values.update(template_values) - if value_updates: - values.update(value_updates) # Drop none/empty values and optional path fields that are set to false for field in list_fields(self): fld_value = values[field.name] if fld_value is None or (is_multi_input(field.type) and fld_value == []): del values[field.name] if is_fileset_or_union(field.type) and type(fld_value) is bool: - assert field.path_template and field.name not in template_values del values[field.name] # Drop special fields that are added separately del values["executable"] @@ -1087,8 +1077,6 @@ def _command_args( pos_val = self._command_pos_args( field=fields[field_name], values=values, - root=root, - output_dir=output_dir, positions_provided=positions_provided, ) if pos_val: @@ -1123,9 +1111,7 @@ def _command_pos_args( self, field: shell.arg, values: dict[str, ty.Any], - output_dir: Path, positions_provided: list[str], - root: Path | None = None, ) -> tuple[int, ty.Any]: """ Checking all additional input fields, setting pos to None, if position not set. @@ -1153,9 +1139,6 @@ def _command_pos_args( positions_provided.append(field.position) value = values[field.name] - if value and isinstance(value, str): - if root: # values from templates - value = value.replace(str(output_dir), f"{root}{output_dir}") if field.readonly and type(value) is not bool and value is not attrs.NOTHING: raise Exception(f"{field.name} is read only, the value can't be provided") @@ -1180,8 +1163,7 @@ def _command_pos_args( else: raise AttributeError( f"arguments of the formatter function from {field.name} " - f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" + f"has to be in inputs or be field, but {argnm} is used" ) cmd_el_str = field.formatter(**call_args_val) cmd_el_str = cmd_el_str.strip().replace(" ", " ") diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 9556aeff74..7b9a2cd30e 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -9,7 +9,6 @@ from .helpers import ensure_list from pydra.utils.typing import StateArray, TypeParser - op = {".": zip, "*": itertools.product} @@ -966,7 +965,7 @@ def combiner_validation(self): def prepare_states( self, - inputs: dict[str, ty.Any] | None = None, + inputs: dict[str, ty.Any], cont_dim: dict[str, int] | None = None, ): """ @@ -978,7 +977,6 @@ def prepare_states( State Values specific elements from inputs that can be used running interfaces """ - assert isinstance(inputs, dict) # checking if splitter and combiner have valid forms self.splitter_validation() self.combiner_validation() diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 7db346e1ac..d7729d09ed 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -583,7 +583,7 @@ def matching_jobs(self, index: StateIndex = StateIndex()) -> "StateArray[Task]": for ind, task in self._tasks.items(): if ind.matches(index): matching.append(task) - elif len(index) > len(task_index): + else: matching.append( self._tasks[index.subset(task_index)] ) # Return a single task @@ -691,29 +691,34 @@ def _resolve_lazy_inputs( return attrs.evolve(task_def, **resolved) def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: - """Split the definition into the different states it will be run over""" + """Split the definition into the different states it will be run over + + Parameters + ---------- + values : dict[str, Any] + The values to use for the split + """ # TODO: doesn't work properly for more cmplicated wf (check if still an issue) if not self.node.state: return {None: self.node._definition} split_defs = {} - if self.state._inputs_ind is None: - self.state.prepare_states(attrs_values(self.node._definition)) - self.state.prepare_inputs() + self.state.prepare_states(self.node.state_values) + self.state.prepare_inputs() for input_ind in self.node.state.inputs_ind: resolved = {} - for inp in set(self.node.input_names): - value = getattr(self.node._definition, inp) + for inpt_name in set(self.node.input_names): + value = getattr(self._definition, inpt_name) if isinstance(value, LazyField): - value = resolved[inp] = value._get_value( + value = resolved[inpt_name] = value._get_value( workflow=self.workflow, graph=self.graph, state_index=StateIndex(input_ind), ) - if f"{self.node.name}.{inp}" in input_ind: - resolved[inp] = self.node.state._get_element( + if f"{self.node.name}.{inpt_name}" in input_ind: + resolved[inpt_name] = self.node.state._get_element( value=value, - field_name=inp, - ind=input_ind[f"{self.node.name}.{inp}"], + field_name=inpt_name, + ind=input_ind[f"{self.node.name}.{inpt_name}"], ) split_defs[StateIndex(input_ind)] = attrs.evolve( self.node._definition, **resolved diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 4c98cb2a37..3b57cb35a4 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -131,7 +131,7 @@ def test_docker_outputspec_1(plugin, tmp_path): @no_win @need_docker -def test_docker_inputspec_1(tmp_path): +def test_docker_inputspec_1(tmp_path, plugin): """a simple customized input definition for docker task""" filename = str(tmp_path / "file_pydra.txt") with open(filename, "w") as f: @@ -154,7 +154,9 @@ def test_docker_inputspec_1(tmp_path): docky = Docky(file=filename) - outputs = docky(environment=Docker(image="busybox"), cache_dir=tmp_path) + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) assert outputs.stdout.strip() == "hello from pydra" @@ -186,7 +188,7 @@ def test_docker_inputspec_1a(tmp_path): docky = Docky() - outputs = docky(environment=Docker(image="busybox"), cache_dir=tmp_path) + outputs = docky(cache_dir=tmp_path, environment=Docker(image="busybox")) assert outputs.stdout.strip() == "hello from pydra" @@ -276,7 +278,9 @@ def test_docker_inputspec_2a_except(plugin, tmp_path): ) assert docky.file2.fspath == filename_2 - outputs = docky(environment=Docker(image="busybox")) + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @@ -319,7 +323,9 @@ def test_docker_inputspec_2a(plugin, tmp_path): docky = Docky(file2=filename_2) - outputs = docky(environment=Docker(image="busybox")) + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) assert outputs.stdout.strip() == "hello from pydra\nhave a nice one" @@ -385,7 +391,9 @@ class Outputs(ShellOutputs): docky = Docky(orig_file=str(file)) - outputs = docky(environment=Docker(image="busybox"), cache_dir=tmp_path) + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) assert outputs.stdout == "" out_file = outputs.out_file.fspath assert out_file.exists() @@ -428,7 +436,9 @@ def test_docker_inputspec_state_1(plugin, tmp_path): docky = Docky().split(file=[str(filename_1), str(filename_2)]) - outputs = docky(environment=Docker(image="busybox")) + outputs = docky( + worker=plugin, cache_dir=tmp_path, environment=Docker(image="busybox") + ) assert outputs.stdout[0].strip() == "hello from pydra" assert outputs.stdout[1].strip() == "have a nice one" @@ -463,7 +473,9 @@ def test_docker_inputspec_state_1b(plugin, tmp_path): ) docky = Docky().split(file=[str(file_1), str(file_2)]) - outputs = docky(environment=Docker(image="busybox")) + outputs = docky( + cache_dir=tmp_path, worker=plugin, environment=Docker(image="busybox") + ) assert outputs.stdout[0].strip() == "hello from pydra" assert outputs.stdout[1].strip() == "have a nice one" @@ -503,7 +515,7 @@ def Workflow(file): wf = Workflow(file=filename) - outputs = wf() + outputs = wf(cache_dir=tmp_path) assert outputs.out.strip() == "hello from pydra" diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 2e183c4e97..2b84ffc957 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -239,7 +239,11 @@ def Workflow(x, y=10): multiply = workflow.add(Multiply(x=x, y=y)) return multiply.out - task = Task(name="mult", definition=Workflow(x=2), submitter=Submitter(worker="cf")) + task = Task( + name="mult", + definition=Workflow(x=2), + submitter=Submitter(cache_dir=tmpdir, worker="cf"), + ) with wf_pkl.open("wb") as fp: cp.dump(task, fp) diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 32aa200bfb..dd144e1672 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -21,13 +21,12 @@ FunFileList, Op4Var, ) - -from pydra.engine.core import Task from pydra.engine.specs import TaskDef from pydra.engine.state import State from pydra.utils.typing import StateArray from pydra.engine.submitter import Submitter from pydra.engine.core import Workflow +from pydra.engine.helpers import attrs_values @workflow.define @@ -48,7 +47,7 @@ def get_state(task: TaskDef, name="NA") -> State: wf.add(task, name=name) node = wf[name] if node.state: - node.state.prepare_states(inputs=node.inputs) + node.state.prepare_states(node.state_values) node.state.prepare_inputs() return node.state @@ -990,7 +989,7 @@ def test_task_state_6(plugin, tmp_path): assert np.allclose(nn.lst, [[2, 3, 4], [1, 2, 3]]) assert state.splitter == ["NA.n", "NA.lst"] - with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + with Submitter(worker="debug", cache_dir=tmp_path) as sub: results = sub(nn) assert not results.errored, "\n".join(results.errors["error message"]) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 5afe8229f6..a07b7a3e63 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1,4 +1,4 @@ -import attr +from glob import glob import typing as ty import os import sys @@ -181,7 +181,7 @@ def test_shell_cmd_6(plugin, tmp_path): cmd_args = [["nipype"], ["pydra"]] # separate command into exec + args shelly = shell.define("shelly")().split( - executable=cmd_exec, additional_args=cmd_args + ["executable", "additional_args"], executable=cmd_exec, additional_args=cmd_args ) assert shelly.executable == ["echo", ["echo", "-n"]] @@ -192,7 +192,7 @@ def test_shell_cmd_6(plugin, tmp_path): # "echo -n nipype", # "echo -n pydra", # ] - outputs = shelly(plugin=plugin) + outputs = shelly(cache_dir=tmp_path, plugin=plugin) assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -224,7 +224,11 @@ def test_shell_cmd_7(plugin, tmp_path): # separate command into exec + args shelly = ( shell.define("shelly")() - .split(executable=cmd_exec, additional_args=cmd_args) + .split( + ["executable", "additional_args"], + executable=cmd_exec, + additional_args=cmd_args, + ) .combine("additional_args") ) @@ -233,10 +237,7 @@ def test_shell_cmd_7(plugin, tmp_path): outputs = shelly(plugin=plugin) - assert outputs.stdout[0][0] == "nipype\n" - assert outputs.stdout[0][1] == "pydra" - assert outputs.stdout[1][0] == "nipype" - assert outputs.stdout[1][1] == "pydra" + assert outputs.stdout == [["nipype\n", "pydra\n"], ["nipype", "pydra"]] # tests with workflows @@ -304,7 +305,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): cmd_exec = "echo" cmd_opt = True cmd_opt_hello = "HELLO" - cmd_args = "from pydra" + cmd_args = ["from pydra"] @shell.define class Shelly(ShellDef["Shelly.Outputs"]): @@ -428,9 +429,8 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd_exec) - with pytest.raises(Exception) as excinfo: + with pytest.raises(ValueError, match="Mandatory field 'text' is not set"): shelly(cache_dir=tmp_path) - assert "mandatory" in str(excinfo.value) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -541,36 +541,14 @@ class Outputs(ShellOutputs): assert outputs.stdout == "Hi\n" -def test_shell_cmd_inputspec_4c_exception(plugin): - """mandatory field added to fields, value provided""" - cmd_exec = "echo" - - # separate command into exec + args - with pytest.raises( - Exception, match=r"default value \('Hello'\) should not be set when the field" - ): - - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - executable = cmd_exec - text: str = shell.arg( - default="Hello", - position=1, - help="text", - argstr="", - ) - - class Outputs(ShellOutputs): - pass - - def test_shell_cmd_inputspec_4d_exception(plugin): """mandatory field added to fields, value provided""" cmd_exec = "echo" # separate command into exec + args with pytest.raises( - Exception, match=r"default value \('Hello'\) should not be set together" + ValueError, + match=r"path_template \('exception'\) can only be provided when there is no default", ): @shell.define @@ -603,6 +581,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="-t", ) opt_S: bool = shell.arg( + default=False, position=2, help="opt S", argstr="-S", @@ -642,9 +621,8 @@ class Outputs(ShellOutputs): pass shelly = Shelly(opt_t=cmd_t, opt_S=cmd_S) - with pytest.raises(Exception) as excinfo: + with pytest.raises(ValueError, match="Mutually exclusive fields"): shelly(cache_dir=tmp_path) - assert "is mutually exclusive" in str(excinfo.value) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -757,19 +735,21 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): using name_tamplate in metadata """ cmd = "touch" - args = ["newfile_tmp.txt"] + arg = "newfile_tmp.txt" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg: str = shell.arg(argstr=None) + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - shelly = Shelly(executable=cmd, additional_args=args) + shelly = Shelly(executable=cmd, arg=arg) outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" @@ -780,39 +760,6 @@ class Outputs(ShellOutputs): assert out1.name == "newfile_tmp.txt" -@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) -def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): - """ - providing output name using input_spec, - using name_tamplate in metadata - and changing the output name for output_spec using output_field_name - """ - cmd = "touch" - args = File.mock("newfile_tmp.txt") - - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - executable = cmd - - class Outputs(ShellOutputs): - out1: File = shell.outarg( - path_template="{args}", - output_field_name="out1_changed", - help="output file", - ) - - shelly = Shelly( - executable=cmd, - additional_args=args, - ) - - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert outputs.stdout == "" - # checking if the file is created in a good place - assert outputs.out1_changed.fspath.parent.parent == tmp_path - assert outputs.out1_changed.fspath.name == "newfile_tmp.txt" - - @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): """ @@ -850,19 +797,21 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): using name_tamplate with txt extension (extension from args should be removed """ cmd = "touch" - args = File.mock("newfile_tmp.txt") + arg = File.mock("newfile_tmp.txt") @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg = shell.arg(argstr=None) + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}.txt", + path_template="{arg}.txt", help="output file", ) - shelly = Shelly(executable=cmd, additional_args=args) + shelly = Shelly(executable=cmd, arg=arg) outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" @@ -1169,6 +1118,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): help="list of files", ) + class Outputs(ShellOutputs): + pass + shelly = Shelly( files=files_list, ) @@ -1274,6 +1226,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) number: int = shell.arg( help="a number", + argstr=None, ) class Outputs(ShellOutputs): @@ -1324,7 +1277,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): """shelltask changes a file in place, - adding copyfile=True to the file-input from input_spec + adding copy_mode="copy" to the file-input from input_spec hardlink or copy in the output_dir should be created """ file = tmp_path / "file_pydra.txt" @@ -1337,16 +1290,14 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd orig_file: File = shell.arg( - position=1, - argstr="", help="orig file", - copyfile=True, + copy_mode="copy", ) class Outputs(ShellOutputs): - out_file: File = shell.outarg( - path_template="{orig_file}", + out_file: File = shell.out( help="output file", + callable=lambda orig_file: orig_file, ) shelly = Shelly(executable=cmd, orig_file=str(file)) @@ -1386,8 +1337,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out_file: File = shell.outarg( - path_template="{orig_file}", + out_file: File = shell.out( + callable=lambda orig_file: orig_file, help="output file", ) @@ -1418,51 +1369,6 @@ class Outputs(ShellOutputs): assert "hello from pydra\n" == f.read() -@pytest.mark.xfail( - reason="not sure if we want to support input overwrite," - "if we allow for this orig_file is changing, so does checksum," - " and the results can't be found" -) -@pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) -def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): - """shelltask changes a file in place, - copyfile is None for the file-input, so original filed is changed - """ - file = tmp_path / "file_pydra.txt" - with open(file, "w") as f: - f.write("hello from pydra\n") - - cmd = ["sed", "-is", "s/hello/hi/"] - - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - executable = cmd - orig_file: File = shell.arg( - position=1, - argstr="", - help="orig file", - ) - - class Outputs(ShellOutputs): - out_file: File = shell.outarg( - path_template="{orig_file}", - help="output file", - ) - - shelly = Shelly( - executable=cmd, - orig_file=str(file), - ) - - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert outputs.stdout == "" - assert outputs.out_file.fspath.exists() - # the file is not copied, it is changed in place - assert outputs.out_file == file - with open(outputs.out_file) as f: - assert "hi from pydra\n" == f.read() - - @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): """adding state to the input from input_spec""" @@ -1478,6 +1384,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly().split("text", text=hello) assert shelly.executable == cmd_exec @@ -1503,6 +1412,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): help="text", ) + class Outputs(ShellOutputs): + pass + with pytest.raises(TypeError): Shelly() @@ -1519,6 +1431,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): text: int = shell.arg(position=1, argstr="", help="text") + class Outputs(ShellOutputs): + pass + with pytest.raises(TypeError): Shelly(text="hello") @@ -1539,6 +1454,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) + class Outputs(ShellOutputs): + pass + # separate command into exec + args shelly = Shelly().split(text=["HELLO", "hi"]) assert shelly.executable == cmd_exec @@ -1560,19 +1478,21 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg: File = shell.arg(argstr=None) + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - shelly = Shelly(executable=cmd).split(args=args) + shelly = Shelly(executable=cmd).split(arg=args) outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() - assert outputs.out1[i].fspath.parent.parent == tmp_path[i] + assert outputs.out1[i].fspath.parent.parent == tmp_path @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1597,14 +1517,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): argstr="", ) + class Outputs(ShellOutputs): + pass + shelly = Shelly().split(file=[file_1, file_2]) assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - assert outputs.stdout[0] == "hello from pydra" - assert outputs.stdout[1] == "have a nice one" + assert outputs.stdout == ["hello from pydra", "have a nice one"] @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1633,8 +1555,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) class Outputs(ShellOutputs): - out_file: File = shell.outarg( - path_template="{orig_file}", + out_file: File = shell.out( + callable=lambda orig_file: orig_file, help="output file", ) @@ -1648,7 +1570,7 @@ class Outputs(ShellOutputs): assert outputs.stdout[i] == "" assert outputs.out_file[i].fspath.exists() # the file is copied, and than it is changed in place - assert outputs.out_file[i].fspath.parent.parent == tmp_path[i] + assert outputs.out_file[i].fspath.parent.parent == tmp_path with open(outputs.out_file[i]) as f: assert f"hi {txt_l[i]}\n" == f.read() # the original file is unchanged @@ -1913,10 +1835,10 @@ def Workflow(cmd1, cmd2, args): for i in range(2): assert res.outputs.out1[i] == "" assert res.outputs.touch_file[i].fspath.exists() - assert res.outputs.touch_file[i].fspath.parent.parent == tmp_path[i] + assert res.outputs.touch_file[i].fspath.parent.parent == tmp_path assert res.outputs.out2[i] == "" assert res.outputs.cp_file[i].fspath.exists() - assert res.outputs.cp_file[i].fspath.parent.parent == tmp_path[i] + assert res.outputs.cp_file[i].fspath.parent.parent == tmp_path def test_wf_shell_cmd_ndst_1(plugin, tmp_path): @@ -1989,11 +1911,11 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ - cmd = ["touch", File.mock("newfile_tmp.txt")] + cmd = ["touch", "newfile_tmp.txt"] Shelly = shell.define( cmd, outputs=[ - shell.outarg(name="newfile", type=File, path_template="newfile_tmp.txt") + shell.out(name="newfile", type=File, callable=lambda: "newfile_tmp.txt") ], ) shelly = Shelly() @@ -2037,7 +1959,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = shell.outarg(path_template="newfile_tmp_.txt") + newfile: File = shell.out(callable=lambda: "newfile_tmp_.txt") shelly = Shelly() @@ -2083,14 +2005,13 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = "newfile_*K.txt" + newfile: File = shell.out(default="newfile_*K.txt") shelly = Shelly() - with pytest.raises(Exception) as excinfo: - with Submitter(plugin=plugin) as sub: - shelly(submitter=sub) - assert "no file matches" in str(excinfo.value) + with pytest.raises(FileNotFoundError): + with Submitter(cache_dir=tmp_path, plugin=plugin) as sub: + sub(shelly) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 4c95d2f66e..3ae3756f16 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -40,7 +40,7 @@ def wf(workflow_task: WorkflowDef) -> Workflow: wf = Workflow.construct(workflow_task) for n in wf.nodes: if n._state: - n._state.prepare_states(inputs=n.inputs) + n._state.prepare_states(inputs=n.state_values) n._state.prepare_inputs() return wf From b7415631cb21838ba7342c07b93041d6cc815de4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Mar 2025 11:50:23 +1100 Subject: [PATCH 327/342] debugging test_shelltask --- pydra/design/base.py | 4 +- pydra/design/shell.py | 25 +++ pydra/engine/core.py | 10 +- pydra/engine/environments.py | 5 +- pydra/engine/helpers_file.py | 19 ++- pydra/engine/specs.py | 45 +++--- pydra/engine/tests/test_shelltask.py | 225 +++++++++++++-------------- pydra/engine/tests/utils.py | 2 + 8 files changed, 177 insertions(+), 158 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 6cafa4ae14..d75dc17850 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -8,7 +8,7 @@ from typing import Self import attrs.validators from attrs.converters import default_if_none -from fileformats.generic import File +from fileformats.generic import File, FileSet from pydra.utils.typing import TypeParser, is_optional, is_fileset_or_union, is_type from pydra.engine.helpers import ( from_list_if_single, @@ -59,6 +59,8 @@ def convert_default_value(value: ty.Any, self_: "Field") -> ty.Any: return value if self_.type is ty.Callable and isinstance(value, ty.Callable): return value + if isinstance(self_, Out) and TypeParser.contains_type(FileSet, self_.type): + return value return TypeParser[self_.type](self_.type, label=self_.name)(value) diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 1cc83d2b15..0ac67e3201 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -3,6 +3,7 @@ from __future__ import annotations import typing as ty import re +import glob from collections import defaultdict import inspect from copy import copy @@ -25,6 +26,7 @@ from pydra.utils.typing import ( is_fileset_or_union, MultiInputObj, + TypeParser, is_optional, optional_type, ) @@ -439,6 +441,16 @@ def make( if inpt.position is None: inpt.position = position_stack.pop(0) + # Convert string default values to callables that glob the files in the cwd + for outpt in parsed_outputs.values(): + if ( + isinstance(outpt, out) + and isinstance(outpt.default, str) + and TypeParser.contains_type(generic.FileSet, outpt.type) + ): + outpt.callable = GlobCallable(outpt.default) + outpt.default = NO_DEFAULT + defn = make_task_def( ShellDef, ShellOutputs, @@ -782,3 +794,16 @@ class _InputPassThrough: def __call__(self, inputs: ShellDef) -> ty.Any: return getattr(inputs, self.name) + + +class GlobCallable: + """Callable that can be used to glob files""" + + def __init__(self, pattern: str): + self.pattern = pattern + + def __call__(self) -> generic.FileSet: + matches = glob.glob(self.pattern) + if not matches: + raise FileNotFoundError(f"No files found matching pattern: {self.pattern}") + return matches diff --git a/pydra/engine/core.py b/pydra/engine/core.py index c5f7f32f5f..6b0d50bbda 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -227,15 +227,9 @@ def uid(self): @property def output_names(self): - """Get the names of the outputs from the task's output_spec - (not everything has to be generated, see _generated_output_names). - """ + """Get the names of the outputs from the task's output_spec""" return [f.name for f in attr.fields(self.definition.Outputs)] - @property - def _generated_output_names(self): - return self.output_names - @property def can_resume(self): """Whether the task accepts checkpoint-restart.""" @@ -286,7 +280,7 @@ def inputs(self) -> dict[str, ty.Any]: map_copyfiles[name] = copied_value self._inputs.update( template_update( - self.definition, self.output_dir, map_copyfiles=map_copyfiles + self.definition, output_dir=self.output_dir, map_copyfiles=map_copyfiles ) ) return self._inputs diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 9701e6f7ef..03ea952efc 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -1,5 +1,6 @@ import typing as ty import os +from copy import copy from .helpers import execute from pathlib import Path from fileformats.generic import FileSet @@ -128,14 +129,14 @@ def get_bindings( f"No support for generating bindings for {type(fileset)} types " f"({fileset})" ) - copy = fld.copy_mode == FileSet.CopyMode.copy + copy_file = fld.copy_mode == FileSet.CopyMode.copy host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") # Default to mounting paths as read-only, but respect existing modes bindings[host_path] = ( env_path, - "rw" if copy or isinstance(fld, shell.outarg) else "ro", + "rw" if copy_file or isinstance(fld, shell.outarg) else "ro", ) # Provide updated in-container paths to the command to be run. If a diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 736a6e2729..7af9859974 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -159,7 +159,7 @@ def template_update_single( values: dict[str, ty.Any] = None, output_dir: Path | None = None, spec_type: str = "input", -) -> Path | None: +) -> Path | list[Path | None] | None: """Update a single template from the input_spec or output_spec based on the value from inputs_dict (checking the types of the fields, that have "output_file_template)" @@ -196,18 +196,19 @@ def template_update_single( return None # inputs_dict[field.name] is True or spec_type is output value = _template_formatting(field, definition, values) - # changing path so it is in the output_dir if output_dir and value is not None: + # changing path so it is in the output_dir # should be converted to str, it is also used for input fields that should be str if type(value) is list: - return [output_dir / val.name for val in value] + value = [output_dir / val.name for val in value] else: - return output_dir / value.name - else: - return None + value = output_dir / value.name + return value -def _template_formatting(field, definition, values): +def _template_formatting( + field: "shell.arg", definition: "ShellDef", values: dict[str, ty.Any] +) -> Path | list[Path] | None: """Formatting the field template based on the values from inputs. Taking into account that the field with a template can be a MultiOutputFile and the field values needed in the template can be a list - @@ -226,7 +227,7 @@ def _template_formatting(field, definition, values): Returns ------- - formatted : str or list + formatted : Path or list[Path | None] or None formatted template """ # if a template is a function it has to be run first with the inputs as the only arg @@ -237,6 +238,8 @@ def _template_formatting(field, definition, values): # as default, we assume that keep_extension is True if isinstance(template, (tuple, list)): formatted = [_single_template_formatting(field, t, values) for t in template] + if any([val is None for val in formatted]): + return None else: assert isinstance(template, str) formatted = _single_template_formatting(field, template, values) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 7d48870d76..e470d04ab1 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -898,12 +898,27 @@ def _from_task(cls, task: "Task[ShellDef]") -> Self: setattr(outputs, fld.name, None) else: raise ValueError( - f"file system path(s) provided to mandatory field {fld.name!r}," - f"{resolved_value}, does not exist, this is likely due to an " + f"file system path(s) provided to mandatory field {fld.name!r}, " + f"'{resolved_value}', does not exist, this is likely due to an " f"error in the {task.name!r} task" ) return outputs + # @classmethod + # def _from_defaults(cls) -> Self: + # """Create an output object from the default values of the fields""" + # defaults = {} + # for field in attrs_fields(cls): + # if isinstance(field.default, attrs.Factory): + # defaults[field.name] = field.default.factory() + # elif TypeParser.contains_type(FileSet, field.type): + # # Will be set by the templating code + # defaults[field.name] = attrs.NOTHING + # else: + # defaults[field.name] = field.default + + # return cls(**defaults) + @classmethod def _resolve_default_value(cls, fld: shell.out, output_dir: Path) -> ty.Any: """Resolve path and glob expr default values relative to the output dir""" @@ -991,20 +1006,24 @@ def _resolve_value( call_args_val[argnm] = fld elif argnm == "output_dir": call_args_val[argnm] = task.output_dir + elif argnm == "executable": + call_args_val[argnm] = task.definition.executable elif argnm == "inputs": call_args_val[argnm] = task.inputs elif argnm == "stdout": call_args_val[argnm] = task.return_values["stdout"] elif argnm == "stderr": call_args_val[argnm] = task.return_values["stderr"] + elif argnm == "self": + pass # If the callable is a class else: try: call_args_val[argnm] = task.inputs[argnm] except KeyError as e: e.add_note( - f"arguments of the callable function from {fld.name} " + f"arguments of the callable function from {fld.name!r} " f"has to be in inputs or be field or output_dir, " - f"but {argnm} is used" + f"but {argnm!r} is used" ) raise return callable_(**call_args_val) @@ -1040,7 +1059,7 @@ def cmdline(self) -> str: the current working directory.""" # Skip the executable, which can be a multi-part command, e.g. 'docker run'. values = attrs_values(self) - values.update(template_update(self)) + values.update(template_update(self, output_dir=Path.cwd())) cmd_args = self._command_args(values=values) cmdline = cmd_args[0] for arg in cmd_args[1:]: @@ -1221,22 +1240,6 @@ def _format_arg(self, field: shell.arg, values: dict[str, ty.Any]) -> list[str]: cmd_el_str = "" return split_cmd(cmd_el_str) - def _generated_output_names(self, stdout: str, stderr: str): - """Returns a list of all outputs that will be generated by the task. - Takes into account the task input and the requires list for the output fields. - TODO: should be in all Output specs? - """ - # checking the input (if all mandatory fields are provided, etc.) - self._check_rules() - output_names = ["return_code", "stdout", "stderr"] - for fld in list_fields(self): - # assuming that field should have either default or metadata, but not both - if is_set(fld.default): - output_names.append(fld.name) - elif is_set(self.Outputs._resolve_output_value(fld, stdout, stderr)): - output_names.append(fld.name) - return output_names - def _rule_violations(self) -> list[str]: errors = super()._rule_violations() diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index a07b7a3e63..1aced6ed14 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -51,7 +51,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert Path(outputs.stdout).parent == tmp_path assert outputs.return_code == 0 assert outputs.stderr == "" @@ -65,7 +65,7 @@ def test_shell_cmd_2(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout.strip() == " ".join(cmd[1:]) assert outputs.return_code == 0 assert outputs.stderr == "" @@ -82,7 +82,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout.strip() == " ".join(cmd_args) assert outputs.return_code == 0 assert outputs.stderr == "" @@ -99,7 +99,7 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo pydra" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "pydra\n" assert outputs.return_code == 0 assert outputs.stderr == "" @@ -142,7 +142,7 @@ def test_shell_cmd_4(plugin, tmp_path): assert shelly.executable == "echo" assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - outputs = shelly(plugin=plugin) + outputs = shelly(plugin="debug") assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -167,7 +167,7 @@ def test_shell_cmd_5(plugin, tmp_path): assert shelly.executable == "echo" assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - outputs = shelly(plugin=plugin) + outputs = shelly(plugin="debug") assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -192,7 +192,7 @@ def test_shell_cmd_6(plugin, tmp_path): # "echo -n nipype", # "echo -n pydra", # ] - outputs = shelly(cache_dir=tmp_path, plugin=plugin) + outputs = shelly(cache_dir=tmp_path, plugin="debug") assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -235,7 +235,7 @@ def test_shell_cmd_7(plugin, tmp_path): assert shelly.executable == ["echo", ["echo", "-n"]] assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) - outputs = shelly(plugin=plugin) + outputs = shelly(plugin="debug") assert outputs.stdout == [["nipype\n", "pydra\n"], ["nipype", "pydra"]] @@ -254,7 +254,7 @@ def Workflow(cmd1, cmd2): wf = Workflow(cmd1="pwd", cmd2="ls") - with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + with Submitter(plugin="debug", cache_dir=tmp_path) as sub: res = sub(wf) assert "_result.pklz" in res.outputs.out @@ -292,7 +292,7 @@ class Outputs(ShellOutputs): assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "hello from pydra" @@ -329,7 +329,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "HELLO from pydra" @@ -355,7 +355,7 @@ class Outputs(ShellOutputs): shelly = Shelly(text=hello) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -379,7 +379,7 @@ class Outputs(ShellOutputs): shelly = Shelly(text=hello) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -407,7 +407,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -456,7 +456,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "\n" @@ -484,7 +484,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "Hello\n" @@ -509,7 +509,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "Hello\n" @@ -537,7 +537,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hi" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "Hi\n" @@ -594,7 +594,7 @@ class Outputs(ShellOutputs): shelly = Shelly(opt_t=cmd_t) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -t" - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, plugin="debug", cache_dir=tmp_path) def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): @@ -656,7 +656,7 @@ class Outputs(ShellOutputs): shelly = Shelly(opt_t=cmd_t, opt_l=cmd_l) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, plugin="debug", cache_dir=tmp_path) def test_shell_cmd_inputspec_6a_exception(plugin, tmp_path): @@ -725,7 +725,7 @@ class Outputs(ShellOutputs): shelly.opt_l = cmd_l assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, plugin="debug", cache_dir=tmp_path) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -751,7 +751,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, arg=arg) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" out1 = outputs.out1.fspath assert out1.exists() @@ -785,7 +785,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, newfile=File.mock("newfile_tmp.txt")) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -813,7 +813,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, arg=arg) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" # checking if the file is created in a good place assert outputs.out1.fspath.parent.parent == tmp_path @@ -854,7 +854,7 @@ class Outputs(ShellOutputs): time="02121010", ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -893,7 +893,7 @@ class Outputs(ShellOutputs): time="02121010", ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -931,7 +931,7 @@ class Outputs(ShellOutputs): file_orig=file, ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy.txt" @@ -969,7 +969,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, file_orig=file) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy.txt" @@ -1009,7 +1009,7 @@ class Outputs(ShellOutputs): file_orig=file, ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy" @@ -1045,7 +1045,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, file_orig=file) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file" @@ -1086,7 +1086,7 @@ class Outputs(ShellOutputs): file_copy="my_file_copy.txt", ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "my_file_copy.txt" @@ -1126,7 +1126,7 @@ class Outputs(ShellOutputs): ) assert shelly.executable == cmd_exec - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "hello from boston" @@ -1242,7 +1242,7 @@ class Outputs(ShellOutputs): number=2, ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" fspath = outputs.file_copy.fspath assert fspath.exists() @@ -1302,7 +1302,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, orig_file=str(file)) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place @@ -1344,7 +1344,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, orig_file=str(file)) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying @@ -1392,7 +1392,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout[0] == "HELLO\n" assert outputs.stdout[1] == "hi\n" @@ -1461,7 +1461,7 @@ class Outputs(ShellOutputs): shelly = Shelly().split(text=["HELLO", "hi"]) assert shelly.executable == cmd_exec - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout[0] == "HELLO\n" assert outputs.stdout[1] == "hi\n" @@ -1472,13 +1472,13 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): adding splitter to input that is used in the output_file_tamplate """ cmd = "touch" - args = [File.mock("newfile_1.txt"), File.mock("newfile_2.txt")] + args = ["newfile_1.txt", "newfile_2.txt"] @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd - arg: File = shell.arg(argstr=None) + arg: str = shell.arg(argstr=None) class Outputs(ShellOutputs): out1: File = shell.outarg( @@ -1488,7 +1488,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd).split(arg=args) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() @@ -1525,7 +1525,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == ["hello from pydra", "have a nice one"] @@ -1565,7 +1565,7 @@ class Outputs(ShellOutputs): ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) for i in range(len(files)): assert outputs.stdout[i] == "" assert outputs.out_file[i].fspath.exists() @@ -1648,7 +1648,7 @@ def Workflow(cmd, args): wf = Workflow(cmd="touch", args=(File.mock("newfile.txt"),)) - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="debug") as sub: res = sub(wf) assert res.outputs.out == "" @@ -1707,7 +1707,7 @@ def Workflow(cmd1, cmd2, args): wf = Workflow(cmd1="touch", cmd2="cp", args=File.mock("newfile.txt")) - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="debug") as sub: res = sub(wf) assert res.outputs.out1 == "" @@ -1768,7 +1768,7 @@ def Workflow(cmd1, cmd2, args): wf = Workflow(cmd1="touch", cmd2="cp", args=File.mock("newfile.txt")) - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="debug") as sub: res = sub(wf) assert res.outputs.out1 == "" @@ -1829,7 +1829,7 @@ def Workflow(cmd1, cmd2, args): args=[File.mock("newfile_1.txt"), File.mock("newfile_2.txt")] ) - with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + with Submitter(plugin="debug", cache_dir=tmp_path) as sub: res = sub(wf) for i in range(2): @@ -1894,7 +1894,7 @@ def Workflow(cmd1, cmd2, args): args=[File.mock("newfile_1.txt"), File.mock("newfile_2.txt")], ) - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="debug") as sub: res = sub(wf) assert res.outputs.out1 == ["", ""] @@ -1920,7 +1920,7 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): ) shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -1942,7 +1942,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -1964,7 +1964,7 @@ class Outputs(ShellOutputs): shelly = Shelly() with pytest.raises(Exception) as exinfo: - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="debug") as sub: shelly(submitter=sub) assert "does not exist" in str(exinfo.value) @@ -1987,7 +1987,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -2010,8 +2010,7 @@ class Outputs(ShellOutputs): shelly = Shelly() with pytest.raises(FileNotFoundError): - with Submitter(cache_dir=tmp_path, plugin=plugin) as sub: - sub(shelly) + shelly(cache_dir=tmp_path, worker="debug") @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2032,7 +2031,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2058,20 +2057,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: MultiOutputFile = shell.outarg(callable=gather_output) + newfile: MultiOutputFile = shell.out(callable=gather_output) shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 assert all([file.fspath.exists() for file in outputs.newfile]) - assert ( - get_output_names(shelly) - == shelly._generated_output_names(outputs.stdout, outputs.stderr) - == ["newfile", "return_code", "stderr", "stdout"] - ) + assert get_output_names(shelly) == ["newfile", "return_code", "stderr", "stdout"] @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2094,11 +2089,11 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile: MultiOutputFile = shell.arg(callable=gather_output) + newfile: MultiOutputFile = shell.out(callable=gather_output) shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2149,11 +2144,11 @@ def gather_output(executable, output_dir): files = executable[1:] return [Path(output_dir) / file for file in files] - newfile: MultiOutputFile = shell.arg(callable=gather_output) + newfile: MultiOutputFile = shell.out(callable=gather_output) shelly = Shelly() - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2167,26 +2162,28 @@ def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): (similar to the previous example, but not touching input_spec) """ cmd = "touch" - args = "newfile_tmp.txt" + arg = "newfile_tmp.txt" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg: str = shell.arg() + class Outputs(ShellOutputs): - out1: File = shell.ouarg( - path_template="{args}", + out1: File = shell.outarg( + path_template="{arg}", help="output file", ) shelly = Shelly( executable=cmd, - additional_args=args, + arg=arg, ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -2248,6 +2245,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): new_files: MultiOutputFile = shell.outarg( + argstr=None, path_template="file{files_id}.txt", help="output file", ) @@ -2257,7 +2255,7 @@ class Outputs(ShellOutputs): files_id=new_files_id, ) - outputs = results_function(shelly, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) assert outputs.stdout == "" for file in outputs.new_files: assert file.fspath.exists() @@ -2293,6 +2291,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): new_files: MultiOutputFile = shell.outarg( + argstr=None, path_template="file{files_id}.txt", help="output file", ) @@ -2317,7 +2316,7 @@ def test_shell_cmd_outputspec_8a(tmp_path, plugin, results_function): requiring two callables with parameters stdout and stderr """ cmd = "echo" - args = ["newfile_1.txt", "newfile_2.txt"] + args = [["newfile_1.txt"], ["newfile_2.txt"]] def get_file_index(stdout): stdout = re.sub(r".*_", "", stdout) @@ -2335,25 +2334,21 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - out1: File = shell.outarg( - path_template="{args}", - help="output file", - ) - out_file_index: int = shell.arg( + out_file_index: int = shell.out( help="output file", callable=get_file_index, ) - stderr_field: str = shell.arg( + stderr_field: str = shell.out( help="The standard error output", callable=get_stderr, ) - shelly = Shelly().split("additional_args", args=args) + shelly = Shelly().split(additional_args=args) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) for index in range(2): assert outputs.out_file_index[index] == index + 1 - assert outputs.stderr_field[index] == f"stderr: {outputs.stderr}" + assert outputs.stderr_field[index] == f"stderr: {outputs.stderr[index]}" def test_shell_cmd_outputspec_8b_error(tmp_path): @@ -2396,16 +2391,18 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg: str = shell.arg() + class Outputs(ShellOutputs): resultsDir: Directory = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - shelly = Shelly(resultsDir="outdir").split(additional_args=args) + shelly = Shelly(resultsDir="outdir").split(arg=args) - results_function(shelly, plugin=plugin, cache_dir=tmp_path) + results_function(shelly, plugin="debug", cache_dir=tmp_path) for index, arg_dir in enumerate(args): assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" @@ -2440,13 +2437,8 @@ class Outputs(ShellOutputs): ) shelly = Shelly(resultsDir="test") - assert ( - get_output_names(shelly) - == shelly._generated_output_names - == ["resultsDir", "return_code", "stderr", "stdout"] - ) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) - print("Cache_dirr:", shelly.cache_dir) + assert get_output_names(shelly) == ["resultsDir", "return_code", "stderr", "stdout"] + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) output_dir = next(tmp_path.iterdir()) assert (output_dir / Path("test")).exists() assert get_lowest_directory(outputs.resultsDir) == get_lowest_directory( @@ -2468,18 +2460,18 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg: str = shell.arg() + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - shelly = Shelly( - executable=cmd, - ).split("args", args=args) + shelly = Shelly(executable=cmd).split(arg=args) - outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) + outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() @@ -2499,19 +2491,19 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - executable = cmd + executable = "placeholder" class Outputs(ShellOutputs): newfile: File = shell.outarg(path_template="newfile_tmp.txt") @workflow.define(outputs=["stdout", "newfile"]) def Workflow(cmd): - shelly = workflow.add(Shelly()) + shelly = workflow.add(Shelly(executable=cmd)) return shelly.stdout, shelly.newfile - wf = Workflow() + wf = Workflow(cmd=cmd) - with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: + with Submitter(plugin="debug", cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.stdout == "" @@ -2605,12 +2597,13 @@ class Outputs(ShellOutputs): shelly = Shelly( file1=File.mock("new_file_1.txt"), file2=File.mock("new_file_2.txt") ) - # all fields from output_spec should be in output_names and _generated_output_names - assert ( - get_output_names(shelly) - == shelly._generated_output_names - == ["newfile1", "newfile2", "return_code", "stderr", "stdout"] - ) + assert get_output_names(shelly) == [ + "newfile1", + "newfile2", + "return_code", + "stderr", + "stdout", + ] outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" @@ -2647,7 +2640,6 @@ class Outputs(ShellOutputs): executable=cmd, ) shelly.file1 = File.mock("new_file_1.txt") - # _generated_output_names should know that newfile2 will not be generated assert get_output_names(shelly) == [ "newfile1", "newfile2", @@ -2657,7 +2649,7 @@ class Outputs(ShellOutputs): ] outputs = shelly(cache_dir=tmp_path) - assert shelly._generated_output_names(outputs.stdout, outputs.stderr) == [ + assert get_output_names(shelly) == [ "newfile1", "return_code", "stderr", @@ -2734,7 +2726,6 @@ class Outputs(ShellOutputs): ) shelly.file1 = File.mock("new_file_1.txt") shelly.file2 = File.mock("new_file_2.txt") - # _generated_output_names should know that newfile2 will not be generated assert get_output_names(shelly) == [ "newfile1", "newfile2", @@ -2744,9 +2735,7 @@ class Outputs(ShellOutputs): ] outputs = shelly(cache_dir=tmp_path) - assert shelly._generated_output_names( - stdout=outputs.stdout, stderr=outputs.stderr - ) == [ + assert get_output_names(shelly) == [ "newfile1", "return_code", "stderr", @@ -2785,14 +2774,14 @@ class Outputs(ShellOutputs): ) shelly.file1 = File.mock("new_file_1.txt") shelly.additional_inp = 2 - # _generated_output_names should be the same as output_names outputs = shelly(cache_dir=tmp_path) - assert ( - get_output_names(shelly) - == shelly._generated_output_names(outputs.stdout, outputs.stderr) - == ["newfile1", "return_code", "stderr", "stdout"] - ) + assert get_output_names(shelly)(outputs.stdout, outputs.stderr) == [ + "newfile1", + "return_code", + "stderr", + "stdout", + ] assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index 4bdae926a7..2f3b973bb3 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -64,6 +64,8 @@ def run_submitter( """ with Submitter(worker=plugin, cache_dir=cache_dir, environment=environment) as sub: results = sub(shell_def) + if results.errored: + raise RuntimeError(f"task {shell_def} failed:\n" + "\n".join(results.errors)) return results.outputs From 183782b7071b7d7070ee3f323e6eb717a9b38205 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Mar 2025 11:50:46 +1100 Subject: [PATCH 328/342] added pympler to test dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 868a488d58..98b65a7c94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ test = [ "pytest-xdist <2.0", "pytest-rerunfailures", "pytest-timeout", + "pympler", "codecov", "fileformats-extras >=0.15a4", "numpy", From 022c02a16691078dd98006a9df9a85f767818733 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Mar 2025 15:34:39 +1100 Subject: [PATCH 329/342] fixed issue in doc-string passing --- pydra/design/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index d75dc17850..616a656f4c 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -947,10 +947,10 @@ def parse_doc_string(doc_str: str) -> tuple[dict[str, str], dict[str, str] | lis for return_val, return_help in re.findall(r":return (\w+): (.*)", doc_str): output_helps[return_val] = return_help google_args_match = re.match( - r".*\n\s+Args:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + r".*\n\s*Args:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE ) google_returns_match = re.match( - r".*\n\s+Returns:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + r".*\n\s*Returns:\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE ) if google_args_match: args_str = google_args_match.group(1) @@ -967,12 +967,12 @@ def parse_doc_string(doc_str: str) -> tuple[dict[str, str], dict[str, str] | lis return_help = white_space_re.sub(" ", return_help).strip() output_helps[return_name] = return_help numpy_args_match = re.match( - r".*\n\s+Parameters\n\s*---------- *\n(.*)", + r".*\n\s+Parameters\n\s*----------\s*\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE, ) numpy_returns_match = re.match( - r".*\n\s+Returns\n\s+------- *\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE + r".*\n\s+Returns\n\s*-------\s*\n(.*)", doc_str, flags=re.DOTALL | re.MULTILINE ) if numpy_args_match: args_str = numpy_args_match.group(1) From ee8800069207844ff1384e6a5fb7b99ba84b82ee Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Mar 2025 15:35:20 +1100 Subject: [PATCH 330/342] changed additional_args to be MultiInputObj[str] from list[str] type --- new_file_2.txt | 0 newfile_tmp.txt | 0 pydra/engine/helpers_state.py | 2 +- pydra/engine/specs.py | 7 ++--- pydra/engine/tests/test_shelltask.py | 46 ++++++++++++++++++---------- 5 files changed, 33 insertions(+), 22 deletions(-) create mode 100644 new_file_2.txt create mode 100644 newfile_tmp.txt diff --git a/new_file_2.txt b/new_file_2.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/newfile_tmp.txt b/newfile_tmp.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pydra/engine/helpers_state.py b/pydra/engine/helpers_state.py index 94022e5724..091aa81d6b 100644 --- a/pydra/engine/helpers_state.py +++ b/pydra/engine/helpers_state.py @@ -629,7 +629,7 @@ def inputs_types_to_dict(name, inputs): def unwrap_splitter( - splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...]] + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...]], ) -> ty.Iterable[str]: """Unwraps a splitter into a flat list of fields that are split over, i.e. [("a", "b"), "c"] -> ["a", "b", "c"] diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index e470d04ab1..0ef4775fdd 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -18,7 +18,7 @@ import cloudpickle as cp from fileformats.generic import FileSet from pydra.utils.messenger import AuditFlag, Messenger -from pydra.utils.typing import is_optional, optional_type +from pydra.utils.typing import is_optional, optional_type, MultiInputObj from .helpers import ( attrs_fields, attrs_values, @@ -1039,11 +1039,10 @@ class ShellDef(TaskDef[ShellOutputsType]): BASE_NAMES = ["additional_args"] - additional_args: list[str] = shell.arg( + additional_args: MultiInputObj[str] = shell.arg( name="additional_args", default=attrs.Factory(list), - type=list[str], - sep=" ", + type=MultiInputObj[str], help="Additional free-form arguments to append to the end of the command.", ) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 1aced6ed14..71c1c65d3f 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1,4 +1,3 @@ -from glob import glob import typing as ty import os import sys @@ -7,7 +6,7 @@ import re import stat from pydra.engine.submitter import Submitter -from pydra.design import shell, workflow +from pydra.design import shell, workflow, python from pydra.engine.specs import ( ShellOutputs, ShellDef, @@ -248,8 +247,14 @@ def test_wf_shell_cmd_1(plugin, tmp_path): @workflow.define def Workflow(cmd1, cmd2): - shelly_pwd = workflow.add(shell.define(cmd1)) - shelly_ls = workflow.add(shell.define(cmd2, additional_args=shelly_pwd.stdout)) + shelly_pwd = workflow.add(shell.define(cmd1)()) + + @python.define + def StripAndListify(x: str) -> list[str]: + return [x.strip()] + + listify = workflow.add(StripAndListify(x=shelly_pwd.stdout)) + shelly_ls = workflow.add(shell.define(cmd2)(additional_args=listify.out)) return shelly_ls.stdout wf = Workflow(cmd1="pwd", cmd2="ls") @@ -1591,30 +1596,32 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = "touch" + arg: str = shell.arg() + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - @workflow.define - def Workflow(cmd, args): + @workflow.define(outputs=["out_f", "stdout"]) + def Workflow(cmd, arg): shelly = workflow.add( Shelly( executable=cmd, - additional_args=args, + arg=arg, ) ) return shelly.out1, shelly.stdout - wf = Workflow(cmd="touch", args=File.mock("newfile.txt")) + wf = Workflow(cmd="touch", arg="newfile.txt") - with Submitter(plugin=plugin_dask_opt) as sub: + with Submitter(plugin=plugin_dask_opt, cache_dir=tmp_path) as sub: res = sub(wf) - assert res.outputs.out == "" + assert res.outputs.stdout == "" assert res.outputs.out_f.fspath.exists() assert res.outputs.out_f.fspath.parent.parent == tmp_path @@ -1628,25 +1635,27 @@ def test_wf_shell_cmd_2a(plugin, tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = "shelly" + arg: str = shell.arg() + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - @workflow.define - def Workflow(cmd, args): + @workflow.define(outputs=["out_f", "out"]) + def Workflow(cmd, arg): shelly = workflow.add( Shelly( executable=cmd, - additional_args=args, + arg=arg, ) ) return shelly.out1, shelly.stdout - wf = Workflow(cmd="touch", args=(File.mock("newfile.txt"),)) + wf = Workflow(cmd="touch", arg="newfile.txt") with Submitter(plugin="debug") as sub: res = sub(wf) @@ -1673,6 +1682,9 @@ class Outputs(ShellOutputs): @shell.define class Shelly2(ShellDef["Shelly2.Outputs"]): + + executable = "shelly2" + orig_file: File = shell.arg( position=1, help="output file", @@ -2491,7 +2503,7 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): @shell.define class Shelly(ShellDef["Shelly.Outputs"]): - executable = "placeholder" + executable = "shelly" class Outputs(ShellOutputs): newfile: File = shell.outarg(path_template="newfile_tmp.txt") From f62fd0ddf948d9608ca161abe2406bb82b1538d4 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Mon, 10 Mar 2025 21:04:49 +1100 Subject: [PATCH 331/342] debugging test_shelltask, reworking resolve of lazy inputs --- pydra/engine/helpers.py | 27 ++++++++++++++++++ pydra/engine/specs.py | 21 +++++++++++--- pydra/engine/state.py | 8 ++++-- pydra/engine/submitter.py | 30 ++++++++++++++++---- pydra/engine/tests/test_shelltask.py | 41 ++++++++++++++++++---------- 5 files changed, 101 insertions(+), 26 deletions(-) diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 088bfc6e4c..7a6701e4f7 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -16,11 +16,14 @@ from filelock import SoftFileLock, Timeout import cloudpickle as cp from fileformats.core import FileSet +from pydra.utils.typing import StateArray + if ty.TYPE_CHECKING: from .specs import TaskDef, Result, WorkflowOutputs, WorkflowDef from .core import Task from pydra.design.base import Field + from pydra.engine.lazy import LazyField PYDRA_ATTR_METADATA = "__PYDRA_METADATA__" @@ -695,3 +698,27 @@ def is_lazy(obj): from pydra.engine.lazy import LazyField return isinstance(obj, LazyField) + + +T = ty.TypeVar("T") +U = ty.TypeVar("U") + + +def state_array_support( + function: ty.Callable[T, U], +) -> ty.Callable[T | StateArray[T], U | StateArray[U]]: + """ + Decorator to convert a allow a function to accept and return StateArray objects, + where the function is applied to each element of the StateArray. + """ + + def state_array_wrapper( + value: "T | StateArray[T] | LazyField[T]", + ) -> "U | StateArray[U] | LazyField[U]": + if is_lazy(value): + return value + if isinstance(value, StateArray): + return StateArray(function(v) for v in value) + return function(value) + + return state_array_wrapper diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 0ef4775fdd..401061e317 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -16,9 +16,9 @@ import attrs from attrs.converters import default_if_none import cloudpickle as cp -from fileformats.generic import FileSet +from fileformats.generic import FileSet, File from pydra.utils.messenger import AuditFlag, Messenger -from pydra.utils.typing import is_optional, optional_type, MultiInputObj +from pydra.utils.typing import is_optional, optional_type from .helpers import ( attrs_fields, attrs_values, @@ -28,6 +28,7 @@ ensure_list, parse_format_string, fields_in_formatter, + state_array_support, ) from .helpers_file import template_update, template_update_single from . import helpers_state as hlpst @@ -1032,6 +1033,16 @@ def _resolve_value( ShellOutputsType = ty.TypeVar("OutputType", bound=ShellOutputs) +@state_array_support +def additional_args_converter(value: ty.Any) -> list[str]: + """Convert additional arguments to a list of strings.""" + if isinstance(value, str): + return shlex.split(value) + if not isinstance(value, ty.Sequence): + return [value] + return list(value) + + @attrs.define(kw_only=True, auto_attribs=False, eq=False) class ShellDef(TaskDef[ShellOutputsType]): @@ -1039,10 +1050,12 @@ class ShellDef(TaskDef[ShellOutputsType]): BASE_NAMES = ["additional_args"] - additional_args: MultiInputObj[str] = shell.arg( + additional_args: list[str | File] = shell.arg( name="additional_args", default=attrs.Factory(list), - type=MultiInputObj[str], + converter=additional_args_converter, + type=list[str | File], + sep=" ", help="Additional free-form arguments to append to the end of the command.", ) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 7b9a2cd30e..d7c93665e4 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -27,14 +27,18 @@ class StateIndex: indices: OrderedDict[str, int] - def __init__(self, indices: dict[str, int] | None = None): + def __init__( + self, indices: dict[str, int] | ty.Sequence[tuple[str, int]] | None = None + ): # We used ordered dict here to ensure the keys are always in the same order # while OrderedDict is not strictly necessary for CPython 3.7+, we use it to # signal that the order of the keys is important if indices is None: self.indices = OrderedDict() else: - self.indices = OrderedDict(sorted(indices.items())) + if isinstance(indices, dict): + indices = indices.items() + self.indices = OrderedDict(sorted(indices)) def __len__(self) -> int: return len(self.indices) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index d7729d09ed..102afa2df0 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -20,7 +20,7 @@ from pydra.utils.hash import PersistentCache from .state import StateIndex from pydra.utils.typing import StateArray -from pydra.engine.lazy import LazyField +from pydra.engine.lazy import LazyField, LazyOutField from .audit import Audit from .core import Task from pydra.utils.messenger import AuditFlag, Messenger @@ -537,6 +537,10 @@ def __init__( self.queued = {} self.running = {} # Not used in logic, but may be useful for progress tracking self.unrunnable = defaultdict(list) + # Prepare the state to be run + if self.state: + self.state.prepare_states(self.node.state_values) + self.state.prepare_inputs() self.state_names = self.node.state.names if self.node.state else [] self.workflow = workflow self.graph = None @@ -567,6 +571,21 @@ def tasks(self) -> ty.Iterable["Task[DefType]"]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() + def translate_index(self, index: StateIndex, lf: LazyOutField): + state_key = f"{lf._node.name}.{lf._field}" + try: + upstream_state = self.state.inner_inputs[state_key] + except KeyError: + state_index = StateIndex(index) + else: + state_index = StateIndex( + zip( + upstream_state.keys_final, + upstream_state.ind_l_final[index[state_key]], + ) + ) + return state_index + def matching_jobs(self, index: StateIndex = StateIndex()) -> "StateArray[Task]": """Get the jobs that match a given state index. @@ -702,23 +721,22 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: if not self.node.state: return {None: self.node._definition} split_defs = {} - self.state.prepare_states(self.node.state_values) - self.state.prepare_inputs() for input_ind in self.node.state.inputs_ind: resolved = {} for inpt_name in set(self.node.input_names): value = getattr(self._definition, inpt_name) + state_key = f"{self.node.name}.{inpt_name}" if isinstance(value, LazyField): value = resolved[inpt_name] = value._get_value( workflow=self.workflow, graph=self.graph, - state_index=StateIndex(input_ind), + state_index=input_ind, ) - if f"{self.node.name}.{inpt_name}" in input_ind: + elif state_key in input_ind: resolved[inpt_name] = self.node.state._get_element( value=value, field_name=inpt_name, - ind=input_ind[f"{self.node.name}.{inpt_name}"], + ind=input_ind[state_key], ) split_defs[StateIndex(input_ind)] = attrs.evolve( self.node._definition, **resolved diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 71c1c65d3f..9f8afa7427 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1674,9 +1674,11 @@ def test_wf_shell_cmd_3(plugin, tmp_path): class Shelly1(ShellDef["Shelly1.Outputs"]): executable = "shelly" + arg: str = shell.arg(argstr=None) + class Outputs(ShellOutputs): file: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) @@ -1700,12 +1702,12 @@ class Outputs(ShellOutputs): ) @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) - def Workflow(cmd1, cmd2, args): + def Workflow(cmd1, cmd2, arg): shelly1 = workflow.add( Shelly1( executable=cmd1, - additional_args=args, + arg=arg, ) ) shelly2 = workflow.add( @@ -1717,9 +1719,9 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - wf = Workflow(cmd1="touch", cmd2="cp", args=File.mock("newfile.txt")) + wf = Workflow(cmd1="touch", cmd2="cp", arg="newfile.txt") - with Submitter(plugin="debug") as sub: + with Submitter(plugin="debug", cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.out1 == "" @@ -1738,14 +1740,19 @@ def test_wf_shell_cmd_3a(plugin, tmp_path): @shell.define class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly" + arg: str = shell.outarg(argstr=None) + class Outputs(ShellOutputs): + file: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) @shell.define class Shelly2(ShellDef["Shelly2.Outputs"]): + executable = "shelly2" orig_file: str = shell.arg( position=1, help="output file", @@ -1761,12 +1768,12 @@ class Outputs(ShellOutputs): ) @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) - def Workflow(cmd1, cmd2, args): + def Workflow(cmd1, cmd2, arg): shelly1 = workflow.add( Shelly1( executable=cmd1, - additional_args=args, + arg=arg, ) ) shelly2 = workflow.add( @@ -1778,7 +1785,7 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - wf = Workflow(cmd1="touch", cmd2="cp", args=File.mock("newfile.txt")) + wf = Workflow(cmd1="touch", cmd2="cp", arg="newfile.txt") with Submitter(plugin="debug") as sub: res = sub(wf) @@ -1861,14 +1868,20 @@ def test_wf_shell_cmd_ndst_1(plugin, tmp_path): @shell.define class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly" + + arg: str = shell.arg(argstr=None) + class Outputs(ShellOutputs): file: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) @shell.define class Shelly2(ShellDef["Shelly2.Outputs"]): + executable = "shelly2" + orig_file: str = shell.arg( position=1, help="output file", @@ -1889,7 +1902,7 @@ def Workflow(cmd1, cmd2, args): shelly1 = workflow.add( Shelly1( executable=cmd1, - ).split("args", args=args) + ).split("arg", arg=args) ) shelly2 = workflow.add( Shelly2( @@ -1903,10 +1916,10 @@ def Workflow(cmd1, cmd2, args): wf = Workflow( cmd1="touch", cmd2="cp", - args=[File.mock("newfile_1.txt"), File.mock("newfile_2.txt")], + args=["newfile_1.txt", "newfile_2.txt"], ) - with Submitter(plugin="debug") as sub: + with Submitter(plugin="debug", cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.out1 == ["", ""] @@ -3288,7 +3301,7 @@ class Outputs(ShellOutputs): # An exception should be raised because the second mandatory output does not exist with pytest.raises( ValueError, - match=r"file system path provided to mandatory field .* does not exist", + match=r"file system path\(s\) provided to mandatory field .* does not exist", ): shelly(cache_dir=tmp_path) # checking if the first output was created From da9215086470aa19a1e56685a0705c419ba0d10a Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Mar 2025 12:27:25 +1100 Subject: [PATCH 332/342] dropped StateIndex object in favour of simple int index --- pydra/engine/core.py | 7 +-- pydra/engine/lazy.py | 17 ++---- pydra/engine/state.py | 119 ++------------------------------------ pydra/engine/submitter.py | 82 +++++++++++--------------- 4 files changed, 47 insertions(+), 178 deletions(-) diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 6b0d50bbda..89bf5af575 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -89,7 +89,7 @@ class Task(ty.Generic[DefType]): definition: DefType submitter: "Submitter | None" environment: "Environment | None" - state_index: state.StateIndex + state_index: int bindings: dict[str, ty.Any] | None = None # Bindings for the task environment _inputs: dict[str, ty.Any] | None = None @@ -100,7 +100,7 @@ def __init__( submitter: "Submitter", name: str, environment: "Environment | None" = None, - state_index: "state.StateIndex | None" = None, + state_index: int | None = None, hooks: TaskHooks | None = None, ): """ @@ -120,9 +120,6 @@ def __init__( 4. Two or more concurrent new processes get to start """ - if state_index is None: - state_index = state.StateIndex() - if not isinstance(definition, TaskDef): raise ValueError( f"Task definition ({definition!r}) must be a TaskDef, not {type(definition)}" diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 0739a67cd2..45c528dd77 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -9,7 +9,6 @@ from .submitter import DiGraph, NodeExecution from .core import Task, Workflow from .specs import TaskDef - from .state import StateIndex T = ty.TypeVar("T") @@ -49,7 +48,7 @@ def _get_value( self, workflow: "Workflow", graph: "DiGraph[NodeExecution]", - state_index: "StateIndex | None" = None, + state_index: int | None = None, ) -> ty.Any: """Return the value of a lazy field. @@ -59,7 +58,7 @@ def _get_value( the workflow object graph: DiGraph[NodeExecution] the graph representing the execution state of the workflow - state_index : StateIndex, optional + state_index : int, optional the state index of the field to access Returns @@ -95,7 +94,7 @@ def _get_value( self, workflow: "Workflow", graph: "DiGraph[NodeExecution]", - state_index: "StateIndex | None" = None, + state_index: int | None = None, ) -> ty.Any: """Return the value of a lazy field. @@ -105,7 +104,7 @@ def _get_value( the workflow object graph: DiGraph[NodeExecution] the graph representing the execution state of the workflow - state_index : StateIndex, optional + state_index : int, optional the state index of the field to access Returns @@ -134,7 +133,7 @@ def _get_value( self, workflow: "Workflow", graph: "DiGraph[NodeExecution]", - state_index: "StateIndex | None" = None, + state_index: int | None = None, ) -> ty.Any: """Return the value of a lazy field. @@ -144,7 +143,7 @@ def _get_value( the workflow object graph: DiGraph[NodeExecution] the graph representing the execution state of the workflow - state_index : StateIndex, optional + state_index : int, optional the state index of the field to access Returns @@ -152,10 +151,6 @@ def _get_value( value : Any the resolved value of the lazy-field """ - from pydra.engine.state import StateIndex - - if state_index is None: - state_index = StateIndex() jobs = graph.node(self._node.name).matching_jobs(state_index) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index d7c93665e4..20b1d0e7b3 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -2,7 +2,6 @@ from copy import deepcopy import itertools -from collections import OrderedDict from functools import reduce import typing as ty from . import helpers_state as hlpst @@ -15,117 +14,6 @@ OutputsType = ty.TypeVar("OutputsType") -class StateIndex: - """The collection of state indices that identifies a single element within the list - of tasks generated from a node - - Parameters - ---------- - indices : dict[str, int] - a dictionary of indices for each input field - """ - - indices: OrderedDict[str, int] - - def __init__( - self, indices: dict[str, int] | ty.Sequence[tuple[str, int]] | None = None - ): - # We used ordered dict here to ensure the keys are always in the same order - # while OrderedDict is not strictly necessary for CPython 3.7+, we use it to - # signal that the order of the keys is important - if indices is None: - self.indices = OrderedDict() - else: - if isinstance(indices, dict): - indices = indices.items() - self.indices = OrderedDict(sorted(indices)) - - def __len__(self) -> int: - return len(self.indices) - - def __iter__(self) -> ty.Generator[str, None, None]: - return iter(self.indices) - - def __getitem__(self, key: str) -> int: - return self.indices[key] - - def __lt__(self, other: "StateIndex") -> bool: - if list(self.indices) != list(other.indices): - raise ValueError( - f"StateIndex {self} does not contain the same indices in the same order " - f"as {other}: {list(self.indices)} != {list(other.indices)}" - ) - return tuple(self.indices.items()) < tuple(other.indices.items()) - - def __repr__(self) -> str: - return ( - "StateIndex(" + ", ".join(f"{n}={v}" for n, v in self.indices.items()) + ")" - ) - - def __hash__(self): - return hash(tuple(self.indices.items())) - - def __eq__(self, other) -> bool: - return self.indices == other.indices - - def __str__(self) -> str: - return "__".join(f"{n}-{i}" for n, i in self.indices.items()) - - def __bool__(self) -> bool: - return bool(self.indices) - - def subset(self, state_names: ty.Iterable[str]) -> ty.Self: - """Create a new StateIndex with only the specified fields - - Parameters - ---------- - fields : list[str] - the fields to keep in the new StateIndex - - Returns - ------- - StateIndex - a new StateIndex with only the specified fields - """ - return type(self)({k: v for k, v in self.indices.items() if k in state_names}) - - def missing(self, state_names: ty.Iterable[str]) -> ty.List[str]: - """Return the fields that are missing from the StateIndex - - Parameters - ---------- - fields : list[str] - the fields to check for - - Returns - ------- - list[str] - the fields that are missing from the StateIndex - """ - return [f for f in state_names if f not in self.indices] - - def matches(self, other: "StateIndex") -> bool: - """Check if the indices that are present in the other StateIndex match - - Parameters - ---------- - other : StateIndex - the other StateIndex to compare against - - Returns - ------- - bool - True if all the indices in the other StateIndex match - """ - if isinstance(other, dict): - other = StateIndex(other) - if not set(self.indices).issuperset(other.indices): - raise ValueError( - f"StateIndex {self} does not contain all the indices in {other}" - ) - return all(self.indices[k] == v for k, v in other.indices.items()) - - class State: """ A class that specifies a State of all tasks. @@ -1314,7 +1202,7 @@ def _single_op_splits(self, op_single): keys = [op_single] return val, keys - def _get_element(self, value: ty.Any, field_name: str, ind: int): + def _get_element(self, value: ty.Any, field_name: str, ind: int) -> ty.Any: """ Extracting element of the inputs taking into account container dimension of the specific element that can be set in self.state.cont_dim. @@ -1329,6 +1217,11 @@ def _get_element(self, value: ty.Any, field_name: str, ind: int): name of the input field ind : int index of the element + + Returns + ------- + Any + specific element of the input field """ if f"{self.name}.{field_name}" in self.cont_dim: return list( diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 102afa2df0..a7b509a420 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -18,9 +18,8 @@ attrs_values, ) from pydra.utils.hash import PersistentCache -from .state import StateIndex from pydra.utils.typing import StateArray -from pydra.engine.lazy import LazyField, LazyOutField +from pydra.engine.lazy import LazyField from .audit import Audit from .core import Task from pydra.utils.messenger import AuditFlag, Messenger @@ -502,19 +501,19 @@ class NodeExecution(ty.Generic[DefType]): submitter: Submitter # List of tasks that were completed successfully - successful: dict[StateIndex, list["Task[DefType]"]] + successful: dict[int, list["Task[DefType]"]] # List of tasks that failed - errored: dict[StateIndex, "Task[DefType]"] + errored: dict[int, "Task[DefType]"] # List of tasks that couldn't be run due to upstream errors - unrunnable: dict[StateIndex, list["Task[DefType]"]] + unrunnable: dict[int, list["Task[DefType]"]] # List of tasks that are queued - queued: dict[StateIndex, "Task[DefType]"] + queued: dict[int, "Task[DefType]"] # List of tasks that are queued - running: dict[StateIndex, tuple["Task[DefType]", datetime]] + running: dict[int, tuple["Task[DefType]", datetime]] # List of tasks that are blocked on other tasks to complete before they can be run - blocked: dict[StateIndex, "Task[DefType]"] | None + blocked: dict[int, "Task[DefType]"] | None - _tasks: dict[StateIndex | None, "Task[DefType]"] | None + _tasks: dict[int | None, "Task[DefType]"] | None workflow: "Workflow" @@ -538,9 +537,12 @@ def __init__( self.running = {} # Not used in logic, but may be useful for progress tracking self.unrunnable = defaultdict(list) # Prepare the state to be run - if self.state: + if node.state: + self.state = deepcopy(node.state) self.state.prepare_states(self.node.state_values) self.state.prepare_inputs() + else: + self.state = None self.state_names = self.node.state.names if self.node.state else [] self.workflow = workflow self.graph = None @@ -561,51 +563,35 @@ def inputs(self) -> "Node.Inputs": def _definition(self) -> "Node": return self.node._definition - @property - def state(self) -> "State": - return self.node.state - @property def tasks(self) -> ty.Iterable["Task[DefType]"]: if self._tasks is None: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def translate_index(self, index: StateIndex, lf: LazyOutField): - state_key = f"{lf._node.name}.{lf._field}" - try: - upstream_state = self.state.inner_inputs[state_key] - except KeyError: - state_index = StateIndex(index) - else: - state_index = StateIndex( - zip( - upstream_state.keys_final, - upstream_state.ind_l_final[index[state_key]], - ) - ) - return state_index - - def matching_jobs(self, index: StateIndex = StateIndex()) -> "StateArray[Task]": + def matching_jobs(self, index: int | None = None) -> "StateArray[Task]": """Get the jobs that match a given state index. Parameters ---------- - index : StateIndex, optional - The state index of the task to get, by default StateIndex() + index : int, optional + The index of the state of the task to get, by default None + + Returns + ------- + matching : StateArray[Task] + The tasks that match the given index """ matching = StateArray() if self.tasks: - task_index = next(iter(self._tasks)) if self._tasks else StateIndex() - if len(task_index) > len(index): + try: + matching.append(self._tasks[index]) + except KeyError: # Select matching tasks and return them in nested state-array objects for ind, task in self._tasks.items(): if ind.matches(index): matching.append(task) - else: - matching.append( - self._tasks[index.subset(task_index)] - ) # Return a single task + return matching @property @@ -684,7 +670,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: def _resolve_lazy_inputs( self, task_def: "TaskDef", - state_index: "StateIndex | None" = None, + state_index: int | None = None, ) -> "TaskDef": """Resolves lazy fields in the task definition by replacing them with their actual values calculated by upstream jobs. @@ -693,7 +679,7 @@ def _resolve_lazy_inputs( ---------- task_def : TaskDef The definition to resolve the lazy fields of - state_index : StateIndex, optional + state_index : int, optional The state index for the workflow, by default None Returns @@ -709,7 +695,7 @@ def _resolve_lazy_inputs( ) return attrs.evolve(task_def, **resolved) - def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: + def _split_definition(self) -> dict[int, "TaskDef[OutputType]"]: """Split the definition into the different states it will be run over Parameters @@ -720,27 +706,25 @@ def _split_definition(self) -> dict[StateIndex, "TaskDef[OutputType]"]: # TODO: doesn't work properly for more cmplicated wf (check if still an issue) if not self.node.state: return {None: self.node._definition} - split_defs = {} - for input_ind in self.node.state.inputs_ind: + split_defs = [] + for i, input_ind in enumerate(self.node.state.inputs_ind): resolved = {} for inpt_name in set(self.node.input_names): value = getattr(self._definition, inpt_name) state_key = f"{self.node.name}.{inpt_name}" if isinstance(value, LazyField): - value = resolved[inpt_name] = value._get_value( + resolved[inpt_name] = value._get_value( workflow=self.workflow, graph=self.graph, - state_index=input_ind, + state_index=i, ) elif state_key in input_ind: resolved[inpt_name] = self.node.state._get_element( value=value, field_name=inpt_name, - ind=input_ind[state_key], + ind=i, ) - split_defs[StateIndex(input_ind)] = attrs.evolve( - self.node._definition, **resolved - ) + split_defs.append(attrs.evolve(self.node._definition, **resolved)) return split_defs def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: From b0db082e500865ffd5387658479014f224e5e743 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 11 Mar 2025 19:14:43 +1100 Subject: [PATCH 333/342] debugging shelltask --- pydra/design/base.py | 4 + pydra/design/shell.py | 39 +--------- pydra/engine/lazy.py | 6 +- pydra/engine/specs.py | 21 +++--- pydra/engine/state.py | 3 - pydra/engine/submitter.py | 38 ++++++---- pydra/engine/tests/test_shelltask.py | 108 +++++++++++++-------------- pydra/utils/typing.py | 4 + 8 files changed, 103 insertions(+), 120 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index 616a656f4c..fcc0a71474 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -350,6 +350,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: if not atr.help: atr.help = helps.get(atr_name, "") elif atr_name in type_hints: + if atr_name.startswith("_"): + continue if atr_name in fields_dict: fields_dict[atr_name].type = type_hints[atr_name] elif auto_attribs: @@ -361,6 +363,8 @@ def get_fields(klass, field_type, auto_attribs, helps) -> dict[str, Field]: ) if auto_attribs: for atr_name, type_ in type_hints.items(): + if atr_name.startswith("_"): + continue if atr_name not in list(fields_dict) + ["Task", "Outputs"]: fields_dict[atr_name] = field_type( name=atr_name, type=type_, help=helps.get(atr_name, "") diff --git a/pydra/design/shell.py b/pydra/design/shell.py index 0ac67e3201..098a87778d 100644 --- a/pydra/design/shell.py +++ b/pydra/design/shell.py @@ -28,7 +28,6 @@ MultiInputObj, TypeParser, is_optional, - optional_type, ) if ty.TYPE_CHECKING: @@ -85,7 +84,7 @@ class arg(Arg): If nothing is provided the field will be inserted between all fields with nonnegative positions and fields with negative positions. sep: str, optional - A separator if a list is provided as a value. + A separator if a sequence type is provided as a value, by default " ". container_path: bool, optional If True a path will be consider as a path inside the container (and not as a local path, by default it is False @@ -99,45 +98,11 @@ class arg(Arg): argstr: str | None = "" position: int | None = None - sep: str | None = attrs.field() + sep: str = " " allowed_values: list | None = None container_path: bool = False # IS THIS STILL USED?? formatter: ty.Callable | None = None - @sep.default - def _sep_default(self): - return " " if self.type is tuple or ty.get_origin(self.type) is tuple else None - - @sep.validator - def _validate_sep(self, _, sep): - if self.type is MultiInputObj: - tp = ty.Any - elif ty.get_origin(self.type) is MultiInputObj: - tp = ty.get_args(self.type)[0] - else: - tp = self.type - if is_optional(tp): - tp = optional_type(tp) - if tp is ty.Any: - return - origin = ty.get_origin(tp) or tp - - if ( - inspect.isclass(origin) - and issubclass(origin, ty.Sequence) - and tp is not str - ): - if sep is None and not self.readonly: - raise ValueError( - f"A value to 'sep' must be provided when type is iterable {tp} " - f"for field {self.name!r}" - ) - elif sep is not None: - raise ValueError( - f"sep ({sep!r}) can only be provided when type is iterable {tp} " - f"for field {self.name!r}" - ) - @attrs.define(kw_only=True) class out(Out): diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 45c528dd77..ff9cc063d1 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -152,7 +152,7 @@ def _get_value( the resolved value of the lazy-field """ - jobs = graph.node(self._node.name).matching_jobs(state_index) + jobs = graph.node(self._node.name).get_jobs(state_index) def retrieve_from_job(job: "Task[DefType]") -> ty.Any: if job.errored: @@ -184,7 +184,9 @@ def retrieve_from_job(job: "Task[DefType]") -> ty.Any: val = self._apply_cast(val) return val - if not self._node.state or not self._node.state.depth(before_combine=True): + if not isinstance(jobs, StateArray): + return retrieve_from_job(jobs) + elif not self._node.state or not self._node.state.depth(before_combine=True): assert len(jobs) == 1 return retrieve_from_job(jobs[0]) elif not self._node.state.keys_final: # all states are combined over diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 401061e317..b67e82c2ab 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1089,6 +1089,7 @@ def _command_args(self, values: dict[str, ty.Any]) -> list[str]: self._check_resolved() self._check_rules() # Drop none/empty values and optional path fields that are set to false + values = copy(values) # Create a copy so we can drop items from the dictionary for field in list_fields(self): fld_value = values[field.name] if fld_value is None or (is_multi_input(field.type) and fld_value == []): @@ -1185,7 +1186,7 @@ def _command_pos_args( call_args_val = {} for argnm in call_args.args: if argnm == "field": - call_args_val[argnm] = value + call_args_val[argnm] = field elif argnm == "inputs": call_args_val[argnm] = values else: @@ -1223,31 +1224,31 @@ def _format_arg(self, field: shell.arg, values: dict[str, ty.Any]) -> list[str]: and isinstance(value, ty.Iterable) and not isinstance(value, (str, bytes)) ): - field.argstr = field.argstr.replace("...", "") + argstr = field.argstr.replace("...", "") # if argstr has a more complex form, with "{input_field}" - if "{" in field.argstr and "}" in field.argstr: + if "{" in argstr and "}" in argstr: argstr_formatted_l = [] for val in value: split_values = copy(values) split_values[field.name] = val - argstr_f = argstr_formatting(field.argstr, split_values) + argstr_f = argstr_formatting(argstr, split_values) argstr_formatted_l.append(f" {argstr_f}") - cmd_el_str = field.sep.join(argstr_formatted_l) + cmd_el_str = " ".join(argstr_formatted_l) else: # argstr has a simple form, e.g. "-f", or "--f" - cmd_el_str = field.sep.join([f" {field.argstr} {val}" for val in value]) + cmd_el_str = " ".join([f" {argstr} {val}" for val in value]) else: # in case there are ... when input is not a list - field.argstr = field.argstr.replace("...", "") + argstr = field.argstr.replace("...", "") if isinstance(value, ty.Iterable) and not isinstance(value, (str, bytes)): cmd_el_str = field.sep.join([str(val) for val in value]) value = cmd_el_str # if argstr has a more complex form, with "{input_field}" - if "{" in field.argstr and "}" in field.argstr: - cmd_el_str = field.argstr.replace(f"{{{field.name}}}", str(value)) + if "{" in argstr and "}" in argstr: + cmd_el_str = argstr.replace(f"{{{field.name}}}", str(value)) cmd_el_str = argstr_formatting(cmd_el_str, values) else: # argstr has a simple form, e.g. "-f", or "--f" if value: - cmd_el_str = f"{field.argstr} {value}" + cmd_el_str = f"{argstr} {value}" else: cmd_el_str = "" return split_cmd(cmd_el_str) diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 20b1d0e7b3..b186a60097 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -879,9 +879,6 @@ def prepare_states( if self.other_states: st: State for nm, (st, _) in self.other_states.items(): - # I think now this if is never used - if not hasattr(st, "states_ind"): - st.prepare_states(self.inputs, cont_dim=cont_dim) self.inputs.update(st.inputs) self.cont_dim.update(st.cont_dim) diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index a7b509a420..ae7556f792 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -538,7 +538,7 @@ def __init__( self.unrunnable = defaultdict(list) # Prepare the state to be run if node.state: - self.state = deepcopy(node.state) + self.state = node.state self.state.prepare_states(self.node.state_values) self.state.prepare_inputs() else: @@ -564,34 +564,43 @@ def _definition(self) -> "Node": return self.node._definition @property - def tasks(self) -> ty.Iterable["Task[DefType]"]: + def tasks(self) -> ty.Generator["Task[DefType]", None, None]: if self._tasks is None: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def matching_jobs(self, index: int | None = None) -> "StateArray[Task]": + def get_jobs( + self, index: int | None = None, as_array: bool = False + ) -> "Task | StateArray[Task]": """Get the jobs that match a given state index. Parameters ---------- index : int, optional The index of the state of the task to get, by default None + as_array : bool, optional + Whether to return the tasks in a state-array object, by default if the index + matches Returns ------- - matching : StateArray[Task] - The tasks that match the given index + matching : Task | StateArray[Task] + The task or tasks that match the given index """ matching = StateArray() if self.tasks: try: - matching.append(self._tasks[index]) + task = self._tasks[index] except KeyError: + if index is None: + return StateArray(self._tasks.values()) # Select matching tasks and return them in nested state-array objects for ind, task in self._tasks.items(): - if ind.matches(index): - matching.append(task) - + matching.append(task) + else: + if not as_array: + return task + matching.append(task) return matching @property @@ -657,7 +666,7 @@ def _generate_tasks(self) -> ty.Iterable["Task[DefType]"]: name=self.node.name, ) else: - for index, split_defn in self._split_definition().items(): + for index, split_defn in enumerate(self._split_definition()): yield Task( definition=split_defn, submitter=self.submitter, @@ -707,7 +716,7 @@ def _split_definition(self) -> dict[int, "TaskDef[OutputType]"]: if not self.node.state: return {None: self.node._definition} split_defs = [] - for i, input_ind in enumerate(self.node.state.inputs_ind): + for input_ind in self.node.state.inputs_ind: resolved = {} for inpt_name in set(self.node.input_names): value = getattr(self._definition, inpt_name) @@ -716,13 +725,13 @@ def _split_definition(self) -> dict[int, "TaskDef[OutputType]"]: resolved[inpt_name] = value._get_value( workflow=self.workflow, graph=self.graph, - state_index=i, + state_index=input_ind[state_key], ) elif state_key in input_ind: resolved[inpt_name] = self.node.state._get_element( value=value, field_name=inpt_name, - ind=i, + ind=input_ind[state_key], ) split_defs.append(attrs.evolve(self.node._definition, **resolved)) return split_defs @@ -754,7 +763,8 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: pred: NodeExecution is_runnable = True for pred in graph.predecessors[self.node.name]: - pred_inds = [j.state_index for j in pred.matching_jobs(index)] + pred_jobs: StateArray[Task] = pred.get_jobs(index, as_array=True) + pred_inds = [j.state_index for j in pred_jobs] if not all(i in pred.successful for i in pred_inds): is_runnable = False blocked = True diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 9f8afa7427..cf0c365f67 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -5,6 +5,7 @@ from pathlib import Path import re import stat +import attrs from pydra.engine.submitter import Submitter from pydra.design import shell, workflow, python from pydra.engine.specs import ( @@ -185,18 +186,9 @@ def test_shell_cmd_6(plugin, tmp_path): assert shelly.executable == ["echo", ["echo", "-n"]] assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) - # assert shelly.cmdline == [ - # "echo nipype", - # "echo pydra", - # "echo -n nipype", - # "echo -n pydra", - # ] outputs = shelly(cache_dir=tmp_path, plugin="debug") - assert outputs.stdout[0] == "nipype\n" - assert outputs.stdout[1] == "pydra\n" - assert outputs.stdout[2] == "nipype" - assert outputs.stdout[3] == "pydra" + assert outputs.stdout == ["nipype\n", "pydra\n", "nipype", "pydra"] assert ( outputs.return_code[0] @@ -919,7 +911,7 @@ def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file_orig: File = shell.arg( - position=2, + position=1, help="new file", argstr="", ) @@ -960,7 +952,7 @@ def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file_orig: File = shell.arg( - position=2, + position=1, help="new file", argstr="", ) @@ -996,7 +988,7 @@ def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file_orig: File = shell.arg( - position=2, + position=1, help="new file", argstr="", ) @@ -1035,7 +1027,7 @@ def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file_orig: File = shell.arg( - position=2, + position=1, help="new file", argstr="", ) @@ -1073,7 +1065,7 @@ def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file_orig: File = shell.arg( - position=2, + position=1, help="new file", argstr="", ) @@ -1160,6 +1152,9 @@ class Shelly(ShellDef["Shelly.Outputs"]): help="a file", ) + class Outputs(ShellOutputs): + pass + with pytest.raises(FileNotFoundError): Shelly(executable=cmd_exec, files=file_2) @@ -1256,27 +1251,37 @@ class Outputs(ShellOutputs): assert fspath.parent.parent == tmp_path -def test_shell_cmd_inputspec_with_iterable(tmp_path): +def test_shell_cmd_inputspec_with_iterable(): """Test formatting of argstr with different iterable types.""" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = "test" - iterable_1: ty.Iterable[int] = shell.arg( + iterable_1: list[int] = shell.arg( help="iterable input 1", argstr="--in1", + sep=" ", ) - iterable_2: ty.Iterable[str] = shell.arg( + iterable_2: set[str] = shell.arg( help="iterable input 2", - argstr="--in2...", + argstr="--in2", + sep=" ", + ) + iterable_3: tuple[float, ...] = shell.arg( + help="iterable input 3", + argstr="--in3...", ) + class Outputs(ShellOutputs): + pass + task = Shelly() for iterable_type in (list, tuple): task.iterable_1 = iterable_type(range(3)) - task.iterable_2 = iterable_type(["bar", "foo"]) - assert task.cmdline == "test --in1 0 1 2 --in2 bar --in2 foo" + task.iterable_2 = iterable_type(["foo"]) + task.iterable_3 = iterable_type([1, 0]) + assert task.cmdline == "test --in1 0 1 2 --in2 foo --in3 1.0 --in3 0.0" @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1804,14 +1809,19 @@ def test_wf_shell_cmd_state_1(plugin, tmp_path): @shell.define class Shelly1(ShellDef["Shelly1.Outputs"]): + executable = "shelly1" + + arg: str = shell.arg(argstr=None) + class Outputs(ShellOutputs): file: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) @shell.define class Shelly2(ShellDef["Shelly2.Outputs"]): + executable = "shelly2" orig_file: str = shell.arg( position=1, help="output file", @@ -1827,12 +1837,12 @@ class Outputs(ShellOutputs): ) @workflow.define(outputs=["touch_file", "out1", "cp_file", "out2"]) - def Workflow(cmd1, cmd2, args): + def Workflow(cmd1, cmd2, arg): shelly1 = workflow.add( Shelly1( executable=cmd1, - additional_args=args, + arg=arg, ) ) shelly2 = workflow.add( @@ -1844,9 +1854,7 @@ def Workflow(cmd1, cmd2, args): return shelly1.file, shelly1.stdout, shelly2.out_file, shelly2.stdout - wf = Workflow(cmd1="touch", cmd2="cp").split( - args=[File.mock("newfile_1.txt"), File.mock("newfile_2.txt")] - ) + wf = Workflow(cmd1="touch", cmd2="cp").split(arg=["newfile_1.txt", "newfile_2.txt"]) with Submitter(plugin="debug", cache_dir=tmp_path) as sub: res = sub(wf) @@ -3356,8 +3364,8 @@ class Outputs(ShellOutputs): shelly = Shelly(out_name=["test_1", "test_2"]) outputs = shelly(cache_dir=tmp_path) - # checking if the outputs are Nothing - assert outputs.out_list == [File(next(tmp_path.iterdir()) / "test_1_real.nii")] + # checking if the outputs is None + assert outputs.out_list is None def test_shellspec_formatter_1(tmp_path): @@ -3372,23 +3380,26 @@ def make_shelly(formatter): class Shelly(ShellDef["Shelly.Outputs"]): executable = "exec" in1: str = shell.arg( - help=""" - just a dummy name - """, + argstr=None, + help="""just a dummy name""", ) in2: str = shell.arg( - help=""" - just a dummy name - """, + argstr=None, + help="""just a dummy name""", ) + together: ty.List = shell.arg( - help=""" - combines in1 and in2 into a list - """, + default=attrs.Factory(list), + help="""combines in1 and in2 into a list""", # When providing a formatter all other metadata options are discarded. formatter=formatter, ) + class Outputs(ShellOutputs): + pass + + return Shelly + Shelly = make_shelly(formatter=formatter_1) shelly = Shelly(in1="i1", in2="i2") assert shelly.cmdline == "exec -t [i1, i2]" @@ -3417,17 +3428,17 @@ def formatter_3(in1, in3): with pytest.raises(Exception) as excinfo: shelly.cmdline assert ( - "arguments of the formatter function from together has to be in inputs or be field or output_dir, but in3 is used" + "arguments of the formatter function from together has to be in inputs or be field, but in3 is used" == str(excinfo.value) ) # checking if field value is accessible when None - def formatter_5(field): - assert field == "-t test" + def formatter_4(field): + assert isinstance(field, shell.arg) # formatter must return a string - return field + return "-t test" - Shelly = make_shelly(formatter_5) + Shelly = make_shelly(formatter_4) shelly = Shelly( in1="i1", @@ -3436,17 +3447,6 @@ def formatter_5(field): ) assert shelly.cmdline == "exec -t test" - # checking if field value is accessible when None - def formatter_4(field): - assert field is None - # formatter must return a string - return "" - - Shelly = make_shelly(formatter_4) - - shelly = Shelly(in1="i1", in2="i2") - assert shelly.cmdline == "exec" - def test_shellspec_formatter_splitter_2(tmp_path): """test the input callable 'formatter' when a splitter is used on an argument of the formatter.""" diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index f757c1a23e..c95ad5f6cd 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -1,6 +1,7 @@ import itertools import inspect from pathlib import Path +import collections.abc import os from copy import copy import sys @@ -120,6 +121,8 @@ class TypeParser(ty.Generic[T]): COERCIBLE_DEFAULT: ty.Tuple[ty.Tuple[type, type], ...] = ( ( (ty.Sequence, ty.Sequence), + (ty.Sequence, collections.abc.Set), + (collections.abc.Set, ty.Sequence), (ty.Mapping, ty.Mapping), (Path, os.PathLike), (str, os.PathLike), @@ -266,6 +269,7 @@ def __call__(self, obj: ty.Any) -> T: f"Mandatory field{self.label_str} of type {self.tp} was not " "provided a value (i.e. a value that wasn't None) " ) from None + self.coerce(obj) raise TypeError( f"Incorrect type for field{self.label_str}: {obj!r} is not of type " f"{self.tp} (and cannot be coerced to it)" From 3a9e35feeb02094f0a6dca1b83d8fe7155d68903 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 14:25:24 +1100 Subject: [PATCH 334/342] test_shelltask passes --- pydra/design/base.py | 47 +++++- pydra/engine/specs.py | 2 +- pydra/engine/tests/conftest.py | 7 +- pydra/engine/tests/test_shelltask.py | 244 ++++++++++++++------------- pydra/utils/hash.py | 17 +- 5 files changed, 182 insertions(+), 135 deletions(-) diff --git a/pydra/design/base.py b/pydra/design/base.py index fcc0a71474..b1b86797b3 100644 --- a/pydra/design/base.py +++ b/pydra/design/base.py @@ -93,7 +93,7 @@ def satisfied(self, inputs: "TaskDef") -> bool: """Check if the requirement is satisfied by the inputs""" value = getattr(inputs, self.name) field = {f.name: f for f in list_fields(inputs)}[self.name] - if value is attrs.NOTHING or field.type is bool and value is False: + if value is None or field.type is bool and value is False: return False if self.allowed_values is None: return True @@ -105,7 +105,7 @@ def parse(cls, value: ty.Any) -> Self: return value elif isinstance(value, str): return Requirement(value) - else: + elif isinstance(value, tuple): name, allowed_values = value if isinstance(allowed_values, str) or not isinstance( allowed_values, ty.Collection @@ -114,6 +114,11 @@ def parse(cls, value: ty.Any) -> Self: f"allowed_values must be a collection of strings, not {allowed_values}" ) return Requirement(name, allowed_values) + else: + raise ValueError( + f"Requirements must be a input field name, a tuple of an input field " + f"name and allowed values or a Requirement object, not {value!r}" + ) def __str__(self): if not self.allowed_values: @@ -126,8 +131,21 @@ def requirements_converter(value: ty.Any) -> list[Requirement]: if isinstance(value, Requirement): return [value] elif isinstance(value, (str, tuple)): - return [Requirement.parse(value)] - return [Requirement.parse(v) for v in value] + try: + return [Requirement.parse(value)] + except ValueError as e: + e.add_note( + f"Parsing requirements specification {value!r} as a single requirement" + ) + raise e + try: + return [Requirement.parse(v) for v in value] + except ValueError as e: + e.add_note( + f"Parsing requirements specification {value!r} as a set of concurrent " + "requirements (i.e. logical AND)" + ) + raise e @attrs.define @@ -166,8 +184,21 @@ def requires_converter( ) -> list[RequirementSet]: """Ensure the requires field is a tuple of tuples""" if isinstance(value, (str, tuple, Requirement)): - return [RequirementSet(value)] - return [RequirementSet(v) for v in value] + try: + return [RequirementSet(value)] + except ValueError as e: + e.add_note( + f"Parsing requirements set specification {value!r} as a single requirement set" + ) + raise e + try: + return [RequirementSet(v) for v in value] + except ValueError as e: + e.add_note( + f"Parsing requirements set specification {value!r} as a set of alternative " + "requirements (i.e. logical OR)" + ) + raise e @attrs.define(kw_only=True) @@ -226,8 +257,8 @@ def mandatory(self): def _requires_validator(self, _, value): if value and self.type not in (ty.Any, bool) and not is_optional(self.type): raise ValueError( - f"Fields with requirements must be of boolean or optional type, " - f"not type {self.type} ({self!r})" + f"Fields with requirements must be of optional type (i.e. in union " + f"with None) or boolean, not type {self.type} ({self!r})" ) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index b67e82c2ab..5bc1542b3e 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -591,7 +591,7 @@ def _check_arg_refs( type_ = inputs[field_name].type if type_ not in (ty.Any, bool) and not is_optional(type_): raise ValueError( - f"Fields included in a 'xor' ({field.name!r}) must be of boolean " + f"Fields included in a 'xor' ({field_name!r}) must be of boolean " f"or optional types, not type {type_}" ) diff --git a/pydra/engine/tests/conftest.py b/pydra/engine/tests/conftest.py index b7ecfbb8e9..642944cf5c 100644 --- a/pydra/engine/tests/conftest.py +++ b/pydra/engine/tests/conftest.py @@ -1,3 +1,4 @@ +from pathlib import Path import pytest @@ -8,9 +9,9 @@ @pytest.fixture(scope="package") -def data_tests_dir(): - test_nii = importlib_resources.files("pydra").joinpath( +def data_tests_dir() -> Path: + data_dir = importlib_resources.files("pydra").joinpath( "engine", "tests", "data_tests" ) - with importlib_resources.as_file(test_nii) as path: + with importlib_resources.as_file(data_dir) as path: yield path diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index cf0c365f67..897e955e80 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -673,6 +673,7 @@ class Shelly(ShellDef["Shelly.Outputs"]): requires=["opt_l"], ) opt_l: bool = shell.arg( + default=False, position=1, help="opt l", argstr="-l", @@ -682,9 +683,9 @@ class Outputs(ShellOutputs): pass shelly = Shelly(executable=cmd_exec, opt_t=cmd_t) - with pytest.raises(Exception) as excinfo: - shelly(cache_dir=tmp_path) - assert "requires" in str(excinfo.value) + + with pytest.raises(ValueError, match="'opt_t' requires \['opt_l'\]"): + shelly() @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -1426,7 +1427,7 @@ class Outputs(ShellOutputs): pass with pytest.raises(TypeError): - Shelly() + Shelly(text="hello") def test_shell_cmd_inputspec_typeval_2(tmp_path): @@ -2151,10 +2152,10 @@ class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd class Outputs(ShellOutputs): - newfile: File = shell.outarg(callable=gather_output) + newfile: File = shell.out(callable=gather_output) shelly = Shelly() - with pytest.raises(AttributeError, match="ble"): + with pytest.raises(KeyError, match="ble"): shelly(cache_dir=tmp_path) @@ -2227,21 +2228,23 @@ def test_shell_cmd_outputspec_6a(tmp_path): (using shorter syntax) """ cmd = "touch" - args = "newfile_tmp.txt" + arg = "newfile_tmp.txt" @shell.define class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd + arg: str = shell.arg(argstr=None) + class Outputs(ShellOutputs): out1: File = shell.outarg( - path_template="{args}", + path_template="{arg}", help="output file", ) - shelly = Shelly(additional_args=args) + shelly = Shelly(arg=arg) outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" @@ -2389,22 +2392,18 @@ def test_shell_cmd_outputspec_8b_error(tmp_path): customised output_spec, adding Int to the output, requiring a function to collect output """ - cmd = "echo" - args = ["newfile_1.txt", "newfile_2.txt"] - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - - executable = cmd - - class Outputs(ShellOutputs): + with pytest.raises( + ValueError, + match="A shell output field must have either a callable or a path_template", + ): - out: int = shell.arg(help="output file", value="val") + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "echo" - shelly = Shelly().split("additional_args", args=args) - with pytest.raises(Exception) as e: - shelly(cache_dir=tmp_path) - assert "has to have a callable" in str(e.value) + class Outputs(ShellOutputs): + out: int = shell.out(help="output file") @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -2583,12 +2582,16 @@ def test_shell_cmd_inputspec_outputspec_1a(tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + file2: str | None = shell.arg( + default=None, help="2nd creadted file", argstr="", position=2 + ) class Outputs(ShellOutputs): - newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") - newfile2: File = shell.outarg(path_template="{file2}", help="newfile 2") + newfile1: File = shell.out(callable=lambda file1: file1, help="newfile 1") + newfile2: File | None = shell.out( + callable=lambda file2: file2, help="newfile 2" + ) shelly = Shelly( executable=cmd, @@ -2616,20 +2619,18 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", requires=["file1"], ) - newfile2: File = shell.outarg( - path_template="{file2}", + newfile2: File | None = shell.out( + callable=lambda file2: file2, help="newfile 1", requires=["file1", "file2"], ) - shelly = Shelly( - file1=File.mock("new_file_1.txt"), file2=File.mock("new_file_2.txt") - ) + shelly = Shelly(file1="new_file_1.txt", file2="new_file_2.txt") assert get_output_names(shelly) == [ "newfile1", "newfile2", @@ -2654,17 +2655,19 @@ def test_shell_cmd_inputspec_outputspec_2a(tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) + file2: str | None = shell.arg( + default=None, help="2nd creadted file", argstr="", position=2 + ) class Outputs(ShellOutputs): - newfile1: File = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", requires=["file1"], ) - newfile2: File = shell.outarg( - path_template="{file2}", + newfile2: File | None = shell.out( + callable=lambda file2: file2, help="newfile 1", requires=["file1", "file2"], ) @@ -2682,12 +2685,6 @@ class Outputs(ShellOutputs): ] outputs = shelly(cache_dir=tmp_path) - assert get_output_names(shelly) == [ - "newfile1", - "return_code", - "stderr", - "stdout", - ] assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() @@ -2710,18 +2707,16 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") - newfile2: File = shell.outarg( - path_template="{file2}", + newfile1: File = shell.out(callable=lambda file1: file1, help="newfile 1") + newfile2: File | None = shell.out( + callable=lambda file2: file2, help="newfile 1", requires=["file1", "additional_inp"], ) - shelly = Shelly( - executable=cmd, - ) - shelly.file1 = File.mock("new_file_1.txt") - shelly.file2 = File.mock("new_file_2.txt") + shelly = Shelly(executable=cmd) + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" shelly.additional_inp = 2 outputs = shelly(cache_dir=tmp_path) @@ -2742,14 +2737,14 @@ def test_shell_cmd_inputspec_outputspec_3a(tmp_path): class Shelly(ShellDef["Shelly.Outputs"]): executable = cmd file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - file2: str = shell.arg(help="2nd creadted file", argstr="", position=2) - additional_inp: str = shell.arg(help="additional inp") + file2: str | None = shell.arg(help="2nd creadted file", argstr="", position=2) + additional_inp: str | None = shell.arg(default=None, help="additional inp") class Outputs(ShellOutputs): - newfile1: File = shell.outarg(path_template="{file1}", help="newfile 1") - newfile2: File = shell.outarg( - path_template="{file2}", + newfile1: File = shell.out(callable=lambda file1: file1, help="newfile 1") + newfile2: File | None = shell.out( + callable=lambda file2: file2, help="newfile 1", requires=["file1", "additional_inp"], ) @@ -2757,8 +2752,8 @@ class Outputs(ShellOutputs): shelly = Shelly( executable=cmd, ) - shelly.file1 = File.mock("new_file_1.txt") - shelly.file2 = File.mock("new_file_2.txt") + shelly.file1 = "new_file_1.txt" + shelly.file2 = "new_file_2.txt" assert get_output_names(shelly) == [ "newfile1", "newfile2", @@ -2767,13 +2762,8 @@ class Outputs(ShellOutputs): "stdout", ] + shelly.file2 = None outputs = shelly(cache_dir=tmp_path) - assert get_output_names(shelly) == [ - "newfile1", - "return_code", - "stderr", - "stdout", - ] assert outputs.stdout == "" assert outputs.newfile1.fspath.exists() @@ -2796,8 +2786,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File | None = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", requires=["file1", ("additional_inp", [2, 3])], ) @@ -2809,7 +2799,7 @@ class Outputs(ShellOutputs): shelly.additional_inp = 2 outputs = shelly(cache_dir=tmp_path) - assert get_output_names(shelly)(outputs.stdout, outputs.stderr) == [ + assert get_output_names(shelly) == [ "newfile1", "return_code", "stderr", @@ -2824,7 +2814,7 @@ def test_shell_cmd_inputspec_outputspec_4a(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires filed adding one additional input to the requires together with a list of the allowed values, - the input is set to a value that is not in the list, so output is NOTHING + the input is set to a value that is not in the list, so output is None """ cmd = ["touch", "newfile_tmp.txt"] @@ -2836,10 +2826,10 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File | None = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", - requires=["file1", ("additional_inp", [2, 3])], + requires=("file1", ("additional_inp", [2, 3])), ) shelly = Shelly(executable=cmd) @@ -2869,8 +2859,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... requires=[ @@ -2907,8 +2897,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... requires=[ @@ -2945,8 +2935,8 @@ class Shelly(ShellDef["Shelly.Outputs"]): class Outputs(ShellOutputs): - newfile1: File | None = shell.outarg( - path_template="{file1}", + newfile1: File | None = shell.out( + callable=lambda file1: file1, help="newfile 1", # requires is a list of list so it's treated as el[0] OR el[1] OR... requires=[ @@ -2955,10 +2945,8 @@ class Outputs(ShellOutputs): ], ) - shelly = Shelly( - executable=cmd, - ) - shelly.file1 = File.mock("new_file_1.txt") + shelly = Shelly(executable=cmd) + shelly.file1 = "new_file_1.txt" outputs = shelly(cache_dir=tmp_path) assert outputs.stdout == "" @@ -2966,34 +2954,32 @@ class Outputs(ShellOutputs): assert outputs.newfile1 is None +@pytest.mark.xfail( + reason="I'm not sure why this requirements specification should fail" +) def test_shell_cmd_inputspec_outputspec_6_except(tmp_path): """ customised input_spec and output_spec, output_spec uses input_spec fields in the requires requires has invalid syntax - exception is raised """ - cmd = ["touch", "newfile_tmp.txt"] - @shell.define - class Shelly(ShellDef["Shelly.Outputs"]): - executable = cmd - file1: str = shell.arg(help="1st creadted file", argstr="", position=1) - additional_inp_A: str = shell.arg(help="additional inp A") + with pytest.raises(Exception, match="requires field can be"): - class Outputs(ShellOutputs): - newfile1: File = shell.outarg( - path_template="{file1}", - help="newfile 1", - # requires has invalid syntax - requires=[["file1", "additional_inp_A"], "file1"], + @shell.define + class Shelly(ShellDef["Shelly.Outputs"]): + executable = "touch" + file1: str = shell.arg(help="1st creadted file", argstr="", position=1) + additional_inp_A: str | None = shell.arg( + default=None, help="additional inp A" ) - shelly = Shelly( - executable=cmd, - ) - shelly.file1 = File.mock("new_file_1.txt") - - with pytest.raises(Exception, match="requires field can be"): - shelly(cache_dir=tmp_path) + class Outputs(ShellOutputs): + newfile1: File | None = shell.out( + callable=lambda file1: file1, + help="newfile 1", + # requires has invalid syntax + requires=[["file1", "additional_inp_A"], "file1"], + ) def no_fsl(): @@ -3018,9 +3004,10 @@ def change_name(file): "remove_eyes", "surfaces", "t2_guided", + None, ] ) - class Shelly(ShellDef["Shelly.Outputs"]): + class Bet(ShellDef["Bet.Outputs"]): executable = "bet" in_file: File = shell.arg( help="input file to skull strip", @@ -3029,69 +3016,85 @@ class Shelly(ShellDef["Shelly.Outputs"]): ) outline: bool = shell.arg( + default=False, help="create surface outline image", argstr="-o", ) mask: bool = shell.arg( + default=False, help="create binary mask image", argstr="-m", ) skull: bool = shell.arg( + default=False, help="create skull image", argstr="-s", ) no_output: bool = shell.arg( + default=False, help="Don't generate segmented output", argstr="-n", ) - frac: float = shell.arg( + frac: float | None = shell.arg( + default=None, help="fractional intensity threshold", argstr="-f", ) - vertical_gradient: float = shell.arg( + vertical_gradient: float | None = shell.arg( + default=None, help="vertical gradient in fractional intensity threshold (-1, 1)", argstr="-g", allowed_values={"min_val": -1, "max_val": 1}, ) - radius: int = shell.arg(argstr="-r", help="head radius") - center: ty.List[int] = shell.arg( + radius: int | None = shell.arg(default=None, argstr="-r", help="head radius") + center: ty.List[int] | None = shell.arg( + default=None, help="center of gravity in voxels", argstr="-c", allowed_values={"min_value": 0, "max_value": 3}, ) threshold: bool = shell.arg( + default=False, argstr="-t", help="apply thresholding to segmented brain image and mask", ) mesh: bool = shell.arg( + default=False, argstr="-e", help="generate a vtk mesh brain surface", ) robust: bool = shell.arg( + default=False, help="robust brain centre estimation (iterates BET several times)", argstr="-R", ) padding: bool = shell.arg( + default=False, help="improve BET if FOV is very small in Z (by temporarily padding end slices", argstr="-Z", ) remove_eyes: bool = shell.arg( + default=False, help="eye & optic nerve cleanup (can be useful in SIENA)", argstr="-S", ) surfaces: bool = shell.arg( + default=False, help="run bet2 and then betsurf to get additional skull and scalp surfaces (includes registrations)", argstr="-A", ) - t2_guided: ty.Union[File, str] = shell.arg( + t2_guided: File | str | None = shell.arg( + default=None, help="as with creating surfaces, when also feeding in non-brain-extracted T2 (includes registrations)", argstr="-A2", ) functional: bool = shell.arg( + default=False, argstr="-F", help="apply to 4D fMRI data", ) reduce_bias: bool = shell.arg( + default=False, argstr="-B", help="bias field and neck cleanup", ) @@ -3115,11 +3118,19 @@ class Outputs(ShellOutputs): in_file = data_tests_dir / "test.nii.gz" # separate command into exec + args - shelly = Shelly(in_file=in_file) - out_file = next(tmp_path.iterdir()) / "test_brain.nii.gz" + shelly = Bet(in_file=File.mock("/path/to/nifti.nii.gz")) assert shelly.executable == "bet" - assert shelly.cmdline == f"bet {in_file} {out_file}" - # outputs = shelly(plugin="cf") + try: + orig_dir = os.getcwd() + os.chdir(tmp_path) + assert ( + shelly.cmdline == f"bet /path/to/nifti.nii.gz {tmp_path}/nifti_brain.nii.gz" + ) + finally: + os.chdir(orig_dir) + shelly = Bet(in_file=in_file) + outputs = shelly(cache_dir=tmp_path) + assert outputs.out_file.name == "test_brain.nii.gz" def test_shell_cmd_optional_output_file1(tmp_path): @@ -3140,16 +3151,16 @@ class Outputs(ShellOutputs): help="output file", ) unused: File | None = shell.outarg( - default=False, + default=None, argstr="--not-used", path_template="out.txt", help="dummy output", ) - my_cp = ShellDef() file1 = tmp_path / "file1.txt" file1.write_text("foo") - outputs = my_cp(input=file1, unused=False) + my_cp = Shelly(input=file1, unused=False) + outputs = my_cp(cache_dir=tmp_path) assert outputs.output.fspath.read_text() == "foo" @@ -3171,16 +3182,17 @@ class Outputs(ShellOutputs): help="dummy output", ) - my_cp = Shelly() file1 = tmp_path / "file1.txt" file1.write_text("foo") - outputs = my_cp(input=file1, output=True) + my_cp = Shelly(input=file1, output=True) + outputs = my_cp(cache_dir=tmp_path) assert outputs.output.fspath.read_text() == "foo" file2 = tmp_path / "file2.txt" file2.write_text("bar") + my_cp2 = Shelly(input=file2, output=False) with pytest.raises(RuntimeError): - my_cp(input=file2, output=False) + my_cp2() def test_shell_cmd_non_existing_outputs_1(tmp_path): diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 9dd8b20ecc..aab9e182cd 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -646,13 +646,16 @@ def strip_annotations(node: ast.AST): indent = re.match(r"(\s*)", src).group(1) if indent: src = re.sub(f"^{indent}", "", src, flags=re.MULTILINE) - func_ast = ast.parse(src).body[0] - strip_annotations(func_ast) - if hasattr(func_ast, "args"): - yield dump_ast(func_ast.args) - if hasattr(func_ast, "body"): - for stmt in func_ast.body: - yield dump_ast(stmt) + try: + func_ast = ast.parse(src).body[0] + strip_annotations(func_ast) + if hasattr(func_ast, "args"): + yield dump_ast(func_ast.args) + if hasattr(func_ast, "body"): + for stmt in func_ast.body: + yield dump_ast(stmt) + except SyntaxError: + yield src.encode() yield b")" From 3287bead06ca6e515970604479dec2a55b73832c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 14:28:21 +1100 Subject: [PATCH 335/342] reverted all plugin specifications in test_shelltask to the plugin fixture (from "debug") --- pydra/engine/tests/test_shelltask.py | 122 +++++++++++++-------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 897e955e80..4408339893 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -51,7 +51,7 @@ def test_shell_cmd_1_strip(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert Path(outputs.stdout).parent == tmp_path assert outputs.return_code == 0 assert outputs.stderr == "" @@ -65,7 +65,7 @@ def test_shell_cmd_2(plugin, results_function, tmp_path): assert shelly.cmdline == " ".join(cmd) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout.strip() == " ".join(cmd[1:]) assert outputs.return_code == 0 assert outputs.stderr == "" @@ -82,7 +82,7 @@ def test_shell_cmd_2a(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout.strip() == " ".join(cmd_args) assert outputs.return_code == 0 assert outputs.stderr == "" @@ -99,7 +99,7 @@ def test_shell_cmd_2b(plugin, results_function, tmp_path): assert shelly.executable == "echo" assert shelly.cmdline == "echo pydra" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "pydra\n" assert outputs.return_code == 0 assert outputs.stderr == "" @@ -142,7 +142,7 @@ def test_shell_cmd_4(plugin, tmp_path): assert shelly.executable == "echo" assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - outputs = shelly(plugin="debug") + outputs = shelly(plugin=plugin) assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -167,7 +167,7 @@ def test_shell_cmd_5(plugin, tmp_path): assert shelly.executable == "echo" assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) # assert shelly.cmdline == ["echo nipype", "echo pydra"] - outputs = shelly(plugin="debug") + outputs = shelly(plugin=plugin) assert outputs.stdout[0] == "nipype\n" assert outputs.stdout[1] == "pydra\n" @@ -186,7 +186,7 @@ def test_shell_cmd_6(plugin, tmp_path): assert shelly.executable == ["echo", ["echo", "-n"]] assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) - outputs = shelly(cache_dir=tmp_path, plugin="debug") + outputs = shelly(cache_dir=tmp_path, plugin=plugin) assert outputs.stdout == ["nipype\n", "pydra\n", "nipype", "pydra"] @@ -226,7 +226,7 @@ def test_shell_cmd_7(plugin, tmp_path): assert shelly.executable == ["echo", ["echo", "-n"]] assert shelly.additional_args == StateArray([["nipype"], ["pydra"]]) - outputs = shelly(plugin="debug") + outputs = shelly(plugin=plugin) assert outputs.stdout == [["nipype\n", "pydra\n"], ["nipype", "pydra"]] @@ -251,7 +251,7 @@ def StripAndListify(x: str) -> list[str]: wf = Workflow(cmd1="pwd", cmd2="ls") - with Submitter(plugin="debug", cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) assert "_result.pklz" in res.outputs.out @@ -289,7 +289,7 @@ class Outputs(ShellOutputs): assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n 'hello from pydra'" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "hello from pydra" @@ -326,7 +326,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.additional_args == cmd_args assert shelly.cmdline == "echo -n HELLO 'from pydra'" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO from pydra" @@ -352,7 +352,7 @@ class Outputs(ShellOutputs): shelly = Shelly(text=hello) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -376,7 +376,7 @@ class Outputs(ShellOutputs): shelly = Shelly(text=hello) assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -404,7 +404,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "HELLO\n" @@ -453,7 +453,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "\n" @@ -481,7 +481,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "Hello\n" @@ -506,7 +506,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hello" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "Hello\n" @@ -534,7 +534,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec assert shelly.cmdline == "echo Hi" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "Hi\n" @@ -591,7 +591,7 @@ class Outputs(ShellOutputs): shelly = Shelly(opt_t=cmd_t) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -t" - results_function(shelly, plugin="debug", cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): @@ -653,7 +653,7 @@ class Outputs(ShellOutputs): shelly = Shelly(opt_t=cmd_t, opt_l=cmd_l) assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin="debug", cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) def test_shell_cmd_inputspec_6a_exception(plugin, tmp_path): @@ -723,7 +723,7 @@ class Outputs(ShellOutputs): shelly.opt_l = cmd_l assert shelly.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - results_function(shelly, plugin="debug", cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) @pytest.mark.parametrize("results_function", [run_no_submitter, run_submitter]) @@ -749,7 +749,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, arg=arg) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" out1 = outputs.out1.fspath assert out1.exists() @@ -783,7 +783,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, newfile=File.mock("newfile_tmp.txt")) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -811,7 +811,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, arg=arg) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # checking if the file is created in a good place assert outputs.out1.fspath.parent.parent == tmp_path @@ -852,7 +852,7 @@ class Outputs(ShellOutputs): time="02121010", ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -891,7 +891,7 @@ class Outputs(ShellOutputs): time="02121010", ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -929,7 +929,7 @@ class Outputs(ShellOutputs): file_orig=file, ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy.txt" @@ -967,7 +967,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, file_orig=file) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy.txt" @@ -1007,7 +1007,7 @@ class Outputs(ShellOutputs): file_orig=file, ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file_copy" @@ -1043,7 +1043,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, file_orig=file) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "file" @@ -1084,7 +1084,7 @@ class Outputs(ShellOutputs): file_copy="my_file_copy.txt", ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.file_copy.fspath.exists() assert outputs.file_copy.fspath.name == "my_file_copy.txt" @@ -1124,7 +1124,7 @@ class Outputs(ShellOutputs): ) assert shelly.executable == cmd_exec - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "hello from boston" @@ -1243,7 +1243,7 @@ class Outputs(ShellOutputs): number=2, ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" fspath = outputs.file_copy.fspath assert fspath.exists() @@ -1313,7 +1313,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, orig_file=str(file)) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is copied, and than it is changed in place @@ -1355,7 +1355,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd, orig_file=str(file)) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying @@ -1403,7 +1403,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout[0] == "HELLO\n" assert outputs.stdout[1] == "hi\n" @@ -1472,7 +1472,7 @@ class Outputs(ShellOutputs): shelly = Shelly().split(text=["HELLO", "hi"]) assert shelly.executable == cmd_exec - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout[0] == "HELLO\n" assert outputs.stdout[1] == "hi\n" @@ -1499,7 +1499,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd).split(arg=args) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() @@ -1536,7 +1536,7 @@ class Outputs(ShellOutputs): assert shelly.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == ["hello from pydra", "have a nice one"] @@ -1576,7 +1576,7 @@ class Outputs(ShellOutputs): ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(files)): assert outputs.stdout[i] == "" assert outputs.out_file[i].fspath.exists() @@ -1663,7 +1663,7 @@ def Workflow(cmd, arg): wf = Workflow(cmd="touch", arg="newfile.txt") - with Submitter(plugin="debug") as sub: + with Submitter(plugin=plugin) as sub: res = sub(wf) assert res.outputs.out == "" @@ -1727,7 +1727,7 @@ def Workflow(cmd1, cmd2, arg): wf = Workflow(cmd1="touch", cmd2="cp", arg="newfile.txt") - with Submitter(plugin="debug", cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.out1 == "" @@ -1793,7 +1793,7 @@ def Workflow(cmd1, cmd2, arg): wf = Workflow(cmd1="touch", cmd2="cp", arg="newfile.txt") - with Submitter(plugin="debug") as sub: + with Submitter(plugin=plugin) as sub: res = sub(wf) assert res.outputs.out1 == "" @@ -1857,7 +1857,7 @@ def Workflow(cmd1, cmd2, arg): wf = Workflow(cmd1="touch", cmd2="cp").split(arg=["newfile_1.txt", "newfile_2.txt"]) - with Submitter(plugin="debug", cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) for i in range(2): @@ -1928,7 +1928,7 @@ def Workflow(cmd1, cmd2, args): args=["newfile_1.txt", "newfile_2.txt"], ) - with Submitter(plugin="debug", cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.out1 == ["", ""] @@ -1954,7 +1954,7 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): ) shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -1976,7 +1976,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -1998,7 +1998,7 @@ class Outputs(ShellOutputs): shelly = Shelly() with pytest.raises(Exception) as exinfo: - with Submitter(plugin="debug") as sub: + with Submitter(plugin=plugin) as sub: shelly(submitter=sub) assert "does not exist" in str(exinfo.value) @@ -2021,7 +2021,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.newfile.fspath.exists() @@ -2065,7 +2065,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2095,7 +2095,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2127,7 +2127,7 @@ class Outputs(ShellOutputs): shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2182,7 +2182,7 @@ def gather_output(executable, output_dir): shelly = Shelly() - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" # newfile is a list assert len(outputs.newfile) == 2 @@ -2217,7 +2217,7 @@ class Outputs(ShellOutputs): arg=arg, ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.out1.fspath.exists() @@ -2291,7 +2291,7 @@ class Outputs(ShellOutputs): files_id=new_files_id, ) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) assert outputs.stdout == "" for file in outputs.new_files: assert file.fspath.exists() @@ -2340,7 +2340,7 @@ class Outputs(ShellOutputs): # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_inputspec_11 # see https://github.com/nipype/pydra/issues/671 - outputs = results_function(shelly, "serial") + outputs = results_function(shelly, cache_dir=tmp_path) assert outputs.stdout == "" assert outputs.new_files.fspath.exists() @@ -2381,7 +2381,7 @@ class Outputs(ShellOutputs): shelly = Shelly().split(additional_args=args) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index in range(2): assert outputs.out_file_index[index] == index + 1 assert outputs.stderr_field[index] == f"stderr: {outputs.stderr[index]}" @@ -2434,7 +2434,7 @@ class Outputs(ShellOutputs): shelly = Shelly(resultsDir="outdir").split(arg=args) - results_function(shelly, plugin="debug", cache_dir=tmp_path) + results_function(shelly, plugin=plugin, cache_dir=tmp_path) for index, arg_dir in enumerate(args): assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" @@ -2470,7 +2470,7 @@ class Outputs(ShellOutputs): shelly = Shelly(resultsDir="test") assert get_output_names(shelly) == ["resultsDir", "return_code", "stderr", "stdout"] - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) output_dir = next(tmp_path.iterdir()) assert (output_dir / Path("test")).exists() assert get_lowest_directory(outputs.resultsDir) == get_lowest_directory( @@ -2503,7 +2503,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd).split(arg=args) - outputs = results_function(shelly, plugin="debug", cache_dir=tmp_path) + outputs = results_function(shelly, plugin=plugin, cache_dir=tmp_path) for i in range(len(args)): assert outputs.stdout[i] == "" assert outputs.out1[i].fspath.exists() @@ -2535,7 +2535,7 @@ def Workflow(cmd): wf = Workflow(cmd=cmd) - with Submitter(plugin="debug", cache_dir=tmp_path) as sub: + with Submitter(plugin=plugin, cache_dir=tmp_path) as sub: res = sub(wf) assert res.outputs.stdout == "" From 83ac1e8b67916f96cb057b0f146fd10b9b6a902b Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 16:41:19 +1100 Subject: [PATCH 336/342] modified bytes_repr_type so that the module location drops any sub-modules starting with "_" --- pydra/utils/hash.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index aab9e182cd..0d013daacd 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -475,7 +475,10 @@ def type_location(tp: type) -> bytes: type_name = tp.__name__ except AttributeError: type_name = tp._name - return f"{klass.__module__}.{type_name}".encode() + mod_path = ".".join( + p for p in klass.__module__.split(".") if not p.startswith("_") + ) + return f"{mod_path}.{type_name}".encode() yield b"type:(" origin = ty.get_origin(klass) From 8f593fd59de06930c0d39a77618a318d89269dbf Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 16:42:31 +1100 Subject: [PATCH 337/342] reverted argstr with "..." in them to use field sep (instead of " " hard-coded). --- pydra/engine/specs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 5bc1542b3e..5752cab39c 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1233,9 +1233,9 @@ def _format_arg(self, field: shell.arg, values: dict[str, ty.Any]) -> list[str]: split_values[field.name] = val argstr_f = argstr_formatting(argstr, split_values) argstr_formatted_l.append(f" {argstr_f}") - cmd_el_str = " ".join(argstr_formatted_l) + cmd_el_str = field.sep.join(argstr_formatted_l) else: # argstr has a simple form, e.g. "-f", or "--f" - cmd_el_str = " ".join([f" {argstr} {val}" for val in value]) + cmd_el_str = field.sep.join([f" {argstr} {val}" for val in value]) else: # in case there are ... when input is not a list argstr = field.argstr.replace("...", "") From c7ad6b2c6f184e659103a6d79cda32a00a70d354 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 16:43:03 +1100 Subject: [PATCH 338/342] fixed up list of stdlib modules and is_stdlib --- pydra/utils/misc.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py index df3ae280fa..2575343fd7 100644 --- a/pydra/utils/misc.py +++ b/pydra/utils/misc.py @@ -3,6 +3,7 @@ import ast import inspect import types +import sysconfig import sys import platformdirs import builtins @@ -148,24 +149,27 @@ def get_builtin_type_names(): return set(name for name, obj in vars(builtins).items() if isinstance(obj, type)) -def in_stdlib(obj: types.FunctionType | type) -> bool: - """Check if a type is in the standard library.""" +def in_stdlib(obj: types.FunctionType | type) -> str | bool: + """Check if a type is in the standard library and return the name of the module if + so.""" module = inspect.getmodule(obj) if module is None: return False if module.__name__.startswith("builtins"): - return True + return "builtins" if module.__name__ == "types" and obj.__name__ not in dir(types): return False - return module.__name__.split(".")[-1] in STDLIB_MODULES + toplevel = module.__name__.split(".")[0] + if toplevel in STDLIB_MODULES: + return toplevel + return False def _stdlib_modules() -> frozenset[str]: """List all standard library modules.""" std_lib_modules = set(sys.builtin_module_names) - for _, modname, ispkg in pkgutil.iter_modules(): - if not ispkg: - std_lib_modules.add(modname) + std_lib_path = sysconfig.get_path("stdlib") + std_lib_modules.update(m[1] for m in pkgutil.iter_modules([std_lib_path])) return frozenset(std_lib_modules) From 5295e46fd679d867247c8c6190cffffe88c80753 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 16:43:41 +1100 Subject: [PATCH 339/342] fixed up LazyField._resolve_value to handle combine states after switching to state indices --- pydra/engine/lazy.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index ff9cc063d1..1797b401d9 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -192,12 +192,15 @@ def retrieve_from_job(job: "Task[DefType]") -> ty.Any: elif not self._node.state.keys_final: # all states are combined over return [retrieve_from_job(j) for j in jobs] elif self._node.state.combiner: - values = StateArray() - for ind in self._node.state.states_ind_final: - values.append( - [retrieve_from_job(j) for j in jobs if j.state_index.matches(ind)] - ) - return values + sorted_values = { + frozenset(i.items()): [] for i in self._node.state.states_ind_final + } + assert len(jobs) == len(self._node.state.inputs_ind) + for ind, job in zip(self._node.state.inputs_ind, jobs): + sorted_values[ + frozenset((key, ind[key]) for key in self._node.state.keys_final) + ].append(retrieve_from_job(job)) + return StateArray(sorted_values.values()) else: return StateArray(retrieve_from_job(j) for j in jobs) From 128c224274a7398ceef2e1f9d17afcb74931ad00 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 16:43:55 +1100 Subject: [PATCH 340/342] minor fixups --- pydra/engine/tests/test_shelltask.py | 2 +- pydra/engine/tests/test_singularity.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index 4408339893..e0d9a35add 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -684,7 +684,7 @@ class Outputs(ShellOutputs): shelly = Shelly(executable=cmd_exec, opt_t=cmd_t) - with pytest.raises(ValueError, match="'opt_t' requires \['opt_l'\]"): + with pytest.raises(ValueError, match=r"'opt_t' requires \['opt_l'\]"): shelly() diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 30a934e014..98d8366499 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -93,6 +93,7 @@ def test_singularity_2a(plugin, tmp_path): ) as sub: res = sub(singu) + assert not res.errored, "\n".join(res.errors["error message"]) assert res.outputs.stdout.strip() == " ".join(cmd_args) assert res.outputs.return_code == 0 From 54bbd470662503a9033bf677c9a14e8c9cff2017 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 12 Mar 2025 18:40:46 +1100 Subject: [PATCH 341/342] fixed all unittests outside of test_workflow that were broken in recent changes --- pydra/engine/environments.py | 54 ++++++++++++------ pydra/engine/helpers.py | 7 ++- pydra/engine/submitter.py | 4 +- pydra/engine/tests/test_helpers.py | 5 +- pydra/engine/tests/test_singularity.py | 2 +- pydra/engine/tests/test_specs.py | 12 ++-- pydra/engine/tests/test_state.py | 78 +++++++++++++++++++++++++- pydra/engine/tests/test_task.py | 17 ++---- pydra/utils/tests/test_typing.py | 12 ++-- pydra/utils/typing.py | 1 - 10 files changed, 139 insertions(+), 53 deletions(-) diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py index 03ea952efc..a13f5bf75f 100644 --- a/pydra/engine/environments.py +++ b/pydra/engine/environments.py @@ -3,10 +3,12 @@ from copy import copy from .helpers import execute from pathlib import Path +import logging from fileformats.generic import FileSet from pydra.engine.helpers import list_fields from pydra.utils.typing import TypeParser +logger = logging.getLogger("pydra") if ty.TYPE_CHECKING: from pydra.engine.core import Task @@ -121,34 +123,50 @@ def get_bindings( fld: shell.arg for fld in list_fields(task.definition): if TypeParser.contains_type(FileSet, fld.type): - fileset: FileSet | None = task.inputs[fld.name] - if not fileset: + value: FileSet | None = task.inputs[fld.name] + if not value: continue - if not isinstance(fileset, (os.PathLike, FileSet)): - raise NotImplementedError( - f"No support for generating bindings for {type(fileset)} types " - f"({fileset})" - ) + copy_file = fld.copy_mode == FileSet.CopyMode.copy - host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") + def map_path(fileset: os.PathLike | FileSet) -> Path: + host_path, env_path = fileset.parent, Path( + f"{root}{fileset.parent}" + ) - # Default to mounting paths as read-only, but respect existing modes - bindings[host_path] = ( - env_path, - "rw" if copy_file or isinstance(fld, shell.outarg) else "ro", - ) + # Default to mounting paths as read-only, but respect existing modes + bindings[host_path] = ( + env_path, + "rw" if copy_file or isinstance(fld, shell.outarg) else "ro", + ) + return ( + env_path / fileset.name + if isinstance(fileset, os.PathLike) + else tuple(env_path / rel for rel in fileset.relative_fspaths) + ) # Provide updated in-container paths to the command to be run. If a # fs-object, which resolves to a single path, just pass in the name of # that path relative to the location in the mount point in the container. # If it is a more complex file-set with multiple paths, then it is converted # into a tuple of paths relative to the base of the fileset. - value_updates[fld.name] = ( - env_path / fileset.name - if isinstance(fileset, os.PathLike) - else tuple(env_path / rel for rel in fileset.relative_fspaths) - ) + if TypeParser.matches(value, os.PathLike | FileSet): + value_updates[fld.name] = map_path(value) + elif TypeParser.matches(value, ty.Sequence[FileSet | os.PathLike]): + mapped_value = [] + for val in value: + mapped_val = map_path(val) + if isinstance(mapped_val, tuple): + mapped_value.extend(mapped_val) + else: + mapped_value.append(mapped_val) + value_updates[fld.name] = mapped_value + else: + logger.debug( + "No support for generating bindings for %s types " "(%s)", + type(value), + value, + ) # Add the cache directory to the list of mounts bindings[task.cache_dir] = (f"{self.root}/{task.cache_dir}", "rw") diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 7a6701e4f7..6288e903dc 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -536,7 +536,12 @@ def load_and_run(task_pkl: Path, rerun: bool = False) -> Path: resultfile = task.output_dir / "_result.pklz" try: - task.run(rerun=rerun) + if task.submitter.worker.is_async: + task.submitter.loop.run_until_complete( + task.submitter.worker.run_async(task, rerun=rerun) + ) + else: + task.submitter.worker.run(task, rerun=rerun) except Exception as e: # creating result and error files if missing errorfile = task.output_dir / "_error.pklz" diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index ae7556f792..f8fcc71e67 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -725,7 +725,7 @@ def _split_definition(self) -> dict[int, "TaskDef[OutputType]"]: resolved[inpt_name] = value._get_value( workflow=self.workflow, graph=self.graph, - state_index=input_ind[state_key], + state_index=input_ind.get(state_key), ) elif state_key in input_ind: resolved[inpt_name] = self.node.state._get_element( @@ -786,7 +786,7 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: if is_runnable: runnable.append(self.blocked.pop(index)) self.queued.update({t.state_index: t for t in runnable}) - return runnable + return list(self.queued.values()) async def prepare_runnable(runnable): diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index 2b84ffc957..d8d1d0bd00 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -230,7 +230,8 @@ def test_load_and_run_exception_run(tmpdir): assert result_1.outputs.out == 2 -def test_load_and_run_wf(tmpdir): +@pytest.mark.parametrize("worker", ["cf", "debug"]) +def test_load_and_run_wf(tmpdir, worker): """testing load_and_run for pickled task""" wf_pkl = Path(tmpdir.join("wf_main.pkl")) @@ -242,7 +243,7 @@ def Workflow(x, y=10): task = Task( name="mult", definition=Workflow(x=2), - submitter=Submitter(cache_dir=tmpdir, worker="cf"), + submitter=Submitter(cache_dir=tmpdir, worker=worker), ) with wf_pkl.open("wb") as fp: diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 98d8366499..0d12b6fee7 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -89,7 +89,7 @@ def test_singularity_2a(plugin, tmp_path): assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" with Submitter( - worker=plugin, environment=Singularity(image=image), cache_dir=tmp_path + worker="debug", environment=Singularity(image=image), cache_dir=tmp_path ) as sub: res = sub(singu) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index 3ae3756f16..3df84d5b69 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -97,12 +97,12 @@ def test_input_file_hash_1(tmp_path): def A(in_file: File) -> File: return in_file - assert A(in_file=outfile)._hash == "9644d3998748b339819c23ec6abec520" + assert A(in_file=outfile)._hash == "e708da65b720212c5ce9ed2c65aae59c" with open(outfile, "w") as fp: fp.write("test") - assert A(in_file=outfile)._hash == "9f7f9377ddef6d8c018f1bf8e89c208c" + assert A(in_file=outfile)._hash == "f726a193430352bb3b92dccf5eccff3a" def test_input_file_hash_2(tmp_path): @@ -117,7 +117,7 @@ def A(in_file: File) -> File: # checking specific hash value hash1 = A(in_file=file)._hash - assert hash1 == "179bd3cbdc747edc4957579376fe8c7d" + assert hash1 == "eba2fafb8df4bae94a7aa42bb159b778" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" @@ -146,7 +146,7 @@ def A(in_file: ty.Union[File, int]) -> File: # checking specific hash value hash1 = A(in_file=file)._hash - assert hash1 == "179bd3cbdc747edc4957579376fe8c7d" + assert hash1 == "eba2fafb8df4bae94a7aa42bb159b778" # checking if different name doesn't affect the hash file_diffname = tmp_path / "in_file_2.txt" @@ -234,7 +234,7 @@ def A(in_file: ty.List[ty.List[ty.Union[int, File]]]) -> File: # checking specific hash value hash1 = A(in_file=[[file, 3]])._hash - assert hash1 == "ffd7afe0ca9d4585518809a509244b4b" + assert hash1 == "b583e0fd5501d3bed9bf510ce2a9e379" # the same file, but int field changes hash1a = A(in_file=[[file, 5]])._hash @@ -268,7 +268,7 @@ def A(in_file: ty.List[ty.Dict[ty.Any, ty.Union[File, int]]]) -> File: # checking specific hash value hash1 = A(in_file=[{"file": file, "int": 3}])._hash - assert hash1 == "ba884a74e33552854271f55b03e53947" + assert hash1 == "aa2d4b708ed0dd8340582a6514bfd5ce" # the same file, but int field changes hash1a = A(in_file=[{"file": file, "int": 5}])._hash diff --git a/pydra/engine/tests/test_state.py b/pydra/engine/tests/test_state.py index 104bd0f549..adf6b02134 100644 --- a/pydra/engine/tests/test_state.py +++ b/pydra/engine/tests/test_state.py @@ -479,6 +479,7 @@ def test_state_connect_1(): no explicit splitter for the second state """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", other_states={"NA": (st1, "b")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a"] @@ -502,6 +503,7 @@ def test_state_connect_1a(): the second state has explicit splitter from the first one (the prev-state part) """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter="_NA", @@ -547,6 +549,7 @@ def test_state_connect_2(): splitter from the first node and a new field (the prev-state and current part) """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter=["_NA", "a"], @@ -595,6 +598,7 @@ def test_state_connect_2a(): adding an additional scalar field that is not part of the splitter """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter=["_NA", "a"], @@ -637,6 +641,7 @@ def test_state_connect_2b(): splitter from the first node (the prev-state part) has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a", other_states={"NA": (st1, "b")}) assert st2.splitter == ["_NA", "NB.a"] @@ -644,7 +649,7 @@ def test_state_connect_2b(): assert st2.current_splitter == "NB.a" assert st2.prev_state_splitter == "_NA" - st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [1, 2]}) + st2.prepare_states(inputs={"NB.a": [1, 2]}) assert st2.group_for_inputs_final == {"NA.a": 0, "NB.a": 1} assert st2.groups_stack_final == [[0, 1]] assert st2.states_ind == [ @@ -675,7 +680,9 @@ def test_state_connect_3(): splitter from the previous states (the prev-state part) has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NB.a": [30, 50]}) st3 = State( name="NC", other_states={"NA": (st1, "b"), "NB": (st2, "c")}, @@ -720,7 +727,9 @@ def test_state_connect_3a(): the third state has explicit splitter that contains splitters from previous states """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -762,7 +771,9 @@ def test_state_connect_3b(): splitter from the second state has to be added (partial prev-state part) """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st3 = State( name="NC", splitter="_NB", @@ -803,7 +814,9 @@ def test_state_connect_4(): the third state has explicit scalar(!) splitter that contains two previous states """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NB.a": [30, 50]}) st3 = State( name="NC", splitter=("_NA", "_NB"), @@ -834,6 +847,7 @@ def test_state_connect_5(): the second state has no explicit splitter """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State(name="NB", other_states={"NA": (st1, "a")}) assert st2.splitter == "_NA" assert st2.splitter_rpn == ["NA.a", "NA.b", "*"] @@ -864,7 +878,9 @@ def test_state_connect_6(): the third state has explicit splitter with splitters from previous states """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -917,7 +933,9 @@ def test_state_connect_6a(): the third state has no explicit splitter """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.a": [600, 700]}) st3 = State( name="NC", other_states={"NA": (st1, "a"), "NB": (st2, "b")}, @@ -967,6 +985,7 @@ def test_state_connect_7(): no explicit splitter for the second state """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", other_states={"NA": (st1, ["x", "y"])}) # should take into account that x, y come from the same task assert st2.splitter == "_NA" @@ -993,7 +1012,9 @@ def test_state_connect_8(): and it should give the same as the previous test """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", other_states={"NA": (st1, "b")}) + st2.prepare_states(inputs={"NA.a": [3, 5]}) st3 = State( name="NC", other_states={"NA": (st1, "x"), "NB": (st2, "y")}, @@ -1027,11 +1048,14 @@ def test_state_connect_9(): """ st1 = State(name="NA_1", splitter="a") + st1.prepare_states(inputs={"NA_1.a": [3, 5], "NA_2.a": [11, 12]}) st1a = State(name="NA_2", splitter="a") + st1a.prepare_states(inputs={"NA_1.a": [3, 5], "NA_2.a": [11, 12]}) st2 = State( name="NB", other_states={"NA_1": (st1, "b"), "NA_2": (st1a, "c")}, ) + st2.prepare_states(inputs={"NA_1.a": [3, 5], "NA_2.a": [11, 12]}) st3 = State( name="NC", other_states={"NA_1": (st1, "x"), "NB": (st2, "y")}, @@ -1068,6 +1092,7 @@ def test_state_connect_innerspl_1(): the second state has an inner splitter, full splitter provided """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter=["_NA", "b"], @@ -1124,6 +1149,9 @@ def test_state_connect_innerspl_1a(): splitter from the first state (the prev-state part) has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states( + inputs={"NA.a": [3, 5]}, + ) st2 = State(name="NB", splitter="b", other_states={"NA": (st1, "b")}) assert st2.splitter == ["_NA", "NB.b"] @@ -1188,6 +1216,10 @@ def test_state_connect_innerspl_2(): only the current part of the splitter provided (the prev-state has to be added) """ st1 = State(name="NA", splitter="a") + st1.prepare_states( + inputs={"NA.a": [3, 5], "NB.b": [[1, 10, 100], [2, 20, 200]], "NB.c": [13, 17]}, + cont_dim={"NB.b": 2}, # will be treated as 2d container + ) st2 = State( name="NB", splitter=["c", "b"], @@ -1263,6 +1295,10 @@ def test_state_connect_innerspl_2a(): """ st1 = State(name="NA", splitter="a") + st1.prepare_states( + inputs={"NA.a": [3, 5], "NB.b": [[1, 10, 100], [2, 20, 200]], "NB.c": [13, 17]}, + cont_dim={"NB.b": 2}, # will be treated as 2d container + ) st2 = State( name="NB", splitter=["b", "c"], @@ -1334,11 +1370,19 @@ def test_state_connect_innerspl_3(): """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter=["c", "b"], other_states={"NA": (st1, "b")}, ) + st2.prepare_states( + inputs={ + "NB.b": [[1, 10, 100], [2, 20, 200]], + "NB.c": [13, 17], + }, + cont_dim={"NB.b": 2}, # will be treated as 2d container + ) st3 = State(name="NC", splitter="d", other_states={"NB": (st2, "a")}) assert st3.splitter == ["_NB", "NC.d"] @@ -1477,7 +1521,19 @@ def test_state_connect_innerspl_4(): the third one connected to two previous, only the current part of splitter provided """ st1 = State(name="NA", splitter="a") + st1.prepare_states( + inputs={ + "NA.a": [3, 5], + } + ) st2 = State(name="NB", splitter=["b", "c"]) + st2.prepare_states( + inputs={ + "NA.a": [3, 5], + "NB.b": [10, 20], + "NB.c": [13, 17], + } + ) st3 = State( name="NC", splitter="d", @@ -1584,6 +1640,7 @@ def test_state_combine_1(): def test_state_connect_combine_1(): """two connected states; outer splitter and combiner in the first one""" st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State(name="NB", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] @@ -1629,6 +1686,9 @@ def test_state_connect_combine_2(): additional splitter in the second node """ st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st1.prepare_states( + inputs={"NA.a": [3, 5], "NA.b": [10, 20], "NB.c": [90, 150], "NB.d": [0, 1]} + ) st2 = State(name="NB", splitter="d", other_states={"NA": (st1, "c")}) assert st1.splitter == ["NA.a", "NA.b"] @@ -1691,6 +1751,7 @@ def test_state_connect_combine_3(): additional splitter in the second node """ st1 = State(name="NA", splitter=["a", "b"], combiner="a") + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State( name="NB", splitter="d", @@ -1761,6 +1822,7 @@ def test_state_connect_innerspl_combine_1(): """one previous node and one inner splitter (and inner splitter combiner); only current part provided - the prev-state part had to be added""" st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter=["c", "b"], @@ -1845,6 +1907,7 @@ def test_state_connect_innerspl_combine_2(): the prev-state part has to be added """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", splitter=["c", "b"], @@ -1924,6 +1987,7 @@ def test_state_connect_combine_prevst_1(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State( name="NB", other_states={"NA": (st1, "b")}, @@ -1958,6 +2022,7 @@ def test_state_connect_combine_prevst_2(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State( name="NB", other_states={"NA": (st1, "b")}, @@ -1970,7 +2035,7 @@ def test_state_connect_combine_prevst_2(): assert st2.current_combiner_all == st2.current_combiner == [] assert st2.splitter_rpn_final == ["NA.b"] - st2.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) + st2.prepare_states(inputs={}) assert st2.group_for_inputs_final == {"NA.b": 0} assert st2.groups_stack_final == [[0]] assert st2.states_ind == [ @@ -1998,7 +2063,9 @@ def test_state_connect_combine_prevst_3(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State(name="NB", other_states={"NA": (st1, "b")}) + st2.prepare_states(inputs={}) st3 = State( name="NC", other_states={"NB": (st2, "c")}, @@ -2009,7 +2076,7 @@ def test_state_connect_combine_prevst_3(): assert st3.combiner == ["NA.a"] assert st3.splitter_rpn_final == ["NA.b"] - st3.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) + st3.prepare_states(inputs={}) assert st3.group_for_inputs_final == {"NA.b": 0} assert st3.groups_stack_final == [[0]] @@ -2038,7 +2105,9 @@ def test_state_connect_combine_prevst_4(): the third state has also combiner from the prev-state part """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NB.a": [600, 700]}) st3 = State( name="NC", splitter=["_NA", "_NB"], @@ -2090,7 +2159,9 @@ def test_state_connect_combine_prevst_5(): the third state has also combiner from the prev-state part """ st1 = State(name="NA", splitter="a") + st1.prepare_states(inputs={"NA.a": [3, 5]}) st2 = State(name="NB", splitter="a") + st2.prepare_states(inputs={"NB.a": [600, 700]}) st3 = State( name="NC", splitter=("_NA", "_NB"), @@ -2124,6 +2195,7 @@ def test_state_connect_combine_prevst_6(): (i.e. from the prev-state part of the splitter), """ st1 = State(name="NA", splitter=["a", "b"]) + st1.prepare_states(inputs={"NA.a": [3, 5], "NA.b": [10, 20]}) st2 = State( name="NB", splitter="c", diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 6a2aa9bc16..f15691b82a 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -946,24 +946,19 @@ def test_audit_shellcommandtask(tmpdir): def test_audit_shellcommandtask_file(tmp_path): # sourcery skip: use-fstring-for-concatenation - import glob - import shutil - # create test.txt file with "This is a test" in it in the tmpdir # create txt file in cwd - with open("test.txt", "w") as f: + test1_file = tmp_path / "test.txt" + test2_file = tmp_path / "test2.txt" + with open(test1_file, "w") as f: f.write("This is a test") - with open("test2.txt", "w") as f: + with open(test2_file, "w") as f: f.write("This is a test") - # copy the test.txt file to the tmpdir - shutil.copy("test.txt", tmp_path) - shutil.copy("test2.txt", tmp_path) - cmd = "cat" - file_in = File(tmp_path / "test.txt") - file_in_2 = File(tmp_path / "test2.txt") + file_in = File(test1_file) + file_in_2 = File(test2_file) test_file_hash = hash_function(file_in) test_file_hash_2 = hash_function(file_in_2) Shelly = shell.define( diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py index 821d5162c2..60f92fbc3c 100644 --- a/pydra/utils/tests/test_typing.py +++ b/pydra/utils/tests/test_typing.py @@ -488,29 +488,25 @@ def test_type_coercion_fail2a(): def test_type_coercion_fail3(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): TypeParser(ty.Sequence, coercible=[(ty.Sequence, ty.Sequence)])( {"a": 1, "b": 2} ) - assert exc_info_matches(exc_info, "doesn't match any of the explicit inclusion") def test_type_coercion_fail4(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match=r"Cannot coerce \{'a': 1\} into"): TypeParser(ty.Sequence, coercible=[(ty.Any, ty.Any)])({"a": 1}) - assert exc_info_matches(exc_info, "Cannot coerce {'a': 1} into") def test_type_coercion_fail5(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="as 1 is not iterable"): TypeParser(ty.List[int], coercible=[(ty.Any, ty.Any)])(1) - assert exc_info_matches(exc_info, "as 1 is not iterable") def test_type_coercion_fail6(): - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="is not a mapping type"): TypeParser(ty.List[ty.Dict[str, str]], coercible=[(ty.Any, ty.Any)])((1, 2, 3)) - assert exc_info_matches(exc_info, "is not a mapping type") def test_type_coercion_realistic(): diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py index c95ad5f6cd..031b131845 100644 --- a/pydra/utils/typing.py +++ b/pydra/utils/typing.py @@ -269,7 +269,6 @@ def __call__(self, obj: ty.Any) -> T: f"Mandatory field{self.label_str} of type {self.tp} was not " "provided a value (i.e. a value that wasn't None) " ) from None - self.coerce(obj) raise TypeError( f"Incorrect type for field{self.label_str}: {obj!r} is not of type " f"{self.tp} (and cannot be coerced to it)" From 4e331e5cbc409741fbdba4098f86aa550fc16a0c Mon Sep 17 00:00:00 2001 From: Tom Close Date: Thu, 13 Mar 2025 12:02:13 +1100 Subject: [PATCH 342/342] debugging states and workflows --- pydra/conftest.py | 4 +- pydra/design/tests/test_python.py | 4 +- pydra/engine/audit.py | 2 +- pydra/engine/core.py | 5 +- pydra/engine/graph.py | 4 +- pydra/engine/helpers.py | 19 +- pydra/engine/lazy.py | 36 +- pydra/engine/node.py | 6 - pydra/engine/submitter.py | 64 +- pydra/engine/tests/test_workflow.py | 3243 ++++++++++++--------------- 10 files changed, 1492 insertions(+), 1895 deletions(-) diff --git a/pydra/conftest.py b/pydra/conftest.py index 66a1d200fc..3deb3df341 100644 --- a/pydra/conftest.py +++ b/pydra/conftest.py @@ -20,7 +20,7 @@ def pytest_generate_tests(metafunc): if bool(shutil.which("sbatch")): Plugins = ["slurm"] else: - Plugins = ["cf"] + Plugins = ["debug"] # ["debug", "cf"] try: if metafunc.config.getoption("dask"): Plugins.append("dask") @@ -50,7 +50,7 @@ def pytest_generate_tests(metafunc): elif bool(shutil.which("sbatch")): Plugins = ["slurm"] else: - Plugins = ["cf"] + Plugins = ["debug"] # ["debug", "cf"] try: if metafunc.config.getoption("psij"): Plugins.append("psij-" + metafunc.config.getoption("psij")) diff --git a/pydra/design/tests/test_python.py b/pydra/design/tests/test_python.py index e698c79495..341183c308 100644 --- a/pydra/design/tests/test_python.py +++ b/pydra/design/tests/test_python.py @@ -11,7 +11,7 @@ sort_key = attrgetter("name") -def test_interface_wrap_function(): +def test_interface_wrap_function(tmp_path): def func(a: int) -> float: """Sample function with inputs and outputs""" return a * 2 @@ -27,7 +27,7 @@ def func(a: int) -> float: ] assert outputs == [python.out(name="out", type=float)] definition = SampleDef(a=1) - outputs = definition() + outputs = definition(cache_dir=tmp_path) assert outputs.out == 2.0 with pytest.raises(TypeError): SampleDef(a=1.5) diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index 55b357b782..1622565dab 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -30,7 +30,7 @@ def __init__(self, audit_flags, messengers, messenger_args, develop=None): Base configuration of auditing. messengers : :obj:`pydra.util.messenger.Messenger` or list of :class:`pydra.util.messenger.Messenger`, optional - Defify types of messenger used by Audit to send a message. + Taskify types of messenger used by Audit to send a message. Could be `PrintMessenger`, `FileMessenger`, or `RemoteRESTMessenger`. messenger_args : :obj:`dict`, optional Optional arguments for the `Messenger.send` method. diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 89bf5af575..56752f071f 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -792,9 +792,8 @@ def execution_graph(self, submitter: "Submitter") -> DiGraph: node.graph = graph return graph - @property - def graph(self) -> DiGraph: - return self._create_graph(self.nodes, detailed=True) + def graph(self, detailed: bool = False) -> DiGraph: + return self._create_graph(self.nodes, detailed=detailed) def _create_graph( self, nodes: "list[Node | NodeExecution]", detailed: bool = False diff --git a/pydra/engine/graph.py b/pydra/engine/graph.py index 4a3beca3aa..447605955f 100644 --- a/pydra/engine/graph.py +++ b/pydra/engine/graph.py @@ -81,8 +81,8 @@ def nodes(self) -> list[NodeType]: def nodes(self, nodes: ty.Iterable[NodeType]) -> None: if nodes: nodes = ensure_list(nodes) - if len(set(nodes)) != len(nodes): - raise Exception("nodes have repeated elements") + # if len(set(nodes)) != len(nodes): + # raise Exception("nodes have repeated elements") self._nodes = nodes def node(self, name: str) -> NodeType: diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index 6288e903dc..6e94089499 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -47,26 +47,17 @@ def plot_workflow( # Construct the workflow object wf = Workflow.construct(workflow_task) - graph = wf.graph + if not name: - name = f"graph_{wf._node.name}" + name = f"graph_{type(workflow_task).__name__}" if type == "simple": - for task in graph.nodes: - wf.create_connections(task) + graph = wf.graph() dotfile = graph.create_dotfile_simple(outdir=out_dir, name=name) elif type == "nested": - for task in graph.nodes: - wf.create_connections(task) + graph = wf.graph() dotfile = graph.create_dotfile_nested(outdir=out_dir, name=name) elif type == "detailed": - # create connections with detailed=True - for task in graph.nodes: - wf.create_connections(task, detailed=True) - # adding wf outputs - for wf_out, lf in wf._connections: - graph.add_edges_description( - (wf._node.name, wf_out, lf._node.name, lf.field) - ) + graph = wf.graph(detailed=True) dotfile = graph.create_dotfile_detailed(outdir=out_dir, name=name) else: raise Exception( diff --git a/pydra/engine/lazy.py b/pydra/engine/lazy.py index 1797b401d9..06c909fd9f 100644 --- a/pydra/engine/lazy.py +++ b/pydra/engine/lazy.py @@ -151,7 +151,7 @@ def _get_value( value : Any the resolved value of the lazy-field """ - + state = self._node.state jobs = graph.node(self._node.name).get_jobs(state_index) def retrieve_from_job(job: "Task[DefType]") -> ty.Any: @@ -184,25 +184,27 @@ def retrieve_from_job(job: "Task[DefType]") -> ty.Any: val = self._apply_cast(val) return val - if not isinstance(jobs, StateArray): + if not isinstance(jobs, StateArray): # single job return retrieve_from_job(jobs) - elif not self._node.state or not self._node.state.depth(before_combine=True): + elif not state or not state.depth(before_combine=True): assert len(jobs) == 1 return retrieve_from_job(jobs[0]) - elif not self._node.state.keys_final: # all states are combined over - return [retrieve_from_job(j) for j in jobs] - elif self._node.state.combiner: - sorted_values = { - frozenset(i.items()): [] for i in self._node.state.states_ind_final - } - assert len(jobs) == len(self._node.state.inputs_ind) - for ind, job in zip(self._node.state.inputs_ind, jobs): - sorted_values[ - frozenset((key, ind[key]) for key in self._node.state.keys_final) - ].append(retrieve_from_job(job)) - return StateArray(sorted_values.values()) - else: - return StateArray(retrieve_from_job(j) for j in jobs) + # elif state.combiner and state.keys_final: + # # We initialise it here rather than using a defaultdict to ensure the order + # # of the keys matches how it is defined in the state so we can return the + # # values in the correct order + # sorted_values = {frozenset(i.items()): [] for i in state.states_ind_final} + # # Iterate through the jobs and append the values to the correct final state + # # key + # for job in jobs: + # state_key = frozenset( + # (key, state.states_ind[job.state_index][key]) + # for key in state.keys_final + # ) + # sorted_values[state_key].append(retrieve_from_job(job)) + # return StateArray(sorted_values.values()) + # else: + return [retrieve_from_job(j) for j in jobs] @property def _source(self): diff --git a/pydra/engine/node.py b/pydra/engine/node.py index a5061fb089..906d29847e 100644 --- a/pydra/engine/node.py +++ b/pydra/engine/node.py @@ -45,12 +45,6 @@ class Node(ty.Generic[OutputType]): init=False, default=None, eq=False, hash=False, repr=False ) _state: State | None = attrs.field(init=False, default=NOT_SET) - # _cont_dim: dict[str, int] | None = attrs.field( - # init=False, default=None - # ) # QUESTION: should this be included in the state? - # _inner_cont_dim: dict[str, int] = attrs.field( - # init=False, factory=dict - # ) # QUESTION: should this be included in the state? def __attrs_post_init__(self): self._set_state() diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index f8fcc71e67..b58fd03a4c 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -569,39 +569,39 @@ def tasks(self) -> ty.Generator["Task[DefType]", None, None]: self._tasks = {t.state_index: t for t in self._generate_tasks()} return self._tasks.values() - def get_jobs( - self, index: int | None = None, as_array: bool = False - ) -> "Task | StateArray[Task]": + def get_jobs(self, final_index: int | None = None) -> "Task | StateArray[Task]": """Get the jobs that match a given state index. Parameters ---------- - index : int, optional - The index of the state of the task to get, by default None - as_array : bool, optional - Whether to return the tasks in a state-array object, by default if the index - matches + final_index : int, optional + The index of the output state array (i.e. after any combinations) of the + job to get, by default None Returns ------- matching : Task | StateArray[Task] The task or tasks that match the given index """ - matching = StateArray() - if self.tasks: - try: - task = self._tasks[index] - except KeyError: - if index is None: - return StateArray(self._tasks.values()) - # Select matching tasks and return them in nested state-array objects - for ind, task in self._tasks.items(): - matching.append(task) - else: - if not as_array: - return task - matching.append(task) - return matching + if not self.tasks: # No jobs, return empty state array + return StateArray() + if not self.node.state: # Return the singular job + assert final_index is None + task = self._tasks[None] + return task + if final_index is None: # return all jobs in a state array + return StateArray(self._tasks.values()) + if not self.node.state.combiner: # Select the job that matches the index + task = self._tasks[final_index] + return task + # Get a slice of the tasks that match the given index of the state array of the + # combined values + final_index = set(self.node.state.states_ind_final[final_index].items()) + return StateArray( + self._tasks[i] + for i, ind in enumerate(self.node.state.states_ind) + if set(ind.items()).issuperset(final_index) + ) @property def started(self) -> bool: @@ -762,9 +762,23 @@ def get_runnable_tasks(self, graph: DiGraph) -> list["Task[DefType]"]: for index, task in list(self.blocked.items()): pred: NodeExecution is_runnable = True + states_ind = ( + list(self.node.state.states_ind[index].items()) + if self.node.state + else [] + ) for pred in graph.predecessors[self.node.name]: - pred_jobs: StateArray[Task] = pred.get_jobs(index, as_array=True) - pred_inds = [j.state_index for j in pred_jobs] + if pred.node.state: + pred_states_ind = { + (k, i) for k, i in states_ind if k.startswith(pred.name + ".") + } + pred_inds = [ + i + for i, ind in enumerate(pred.node.state.states_ind) + if set(ind.items()).issuperset(pred_states_ind) + ] + else: + pred_inds = [None] if not all(i in pred.successful for i in pred_inds): is_runnable = False blocked = True diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index 1ce9364199..eb4c8e5025 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -38,748 +38,645 @@ from pydra.engine.submitter import Submitter from pydra.design import python, workflow import pydra.engine.core +from pydra.engine.core import Workflow +from pydra.engine.helpers import plot_workflow from pydra.utils import exc_info_matches -def test_wf_no_output(plugin, tmpdir): +def test_wf_no_output(plugin, tmp_path): """Raise error when output isn't set with set_output""" @workflow.define - def Workflow(x): + def Worky(x): workflow.add(Add2(x=x)) - wf = Workflow(x=2) + worky = Worky(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) - assert "Workflow output cannot be None" in str(excinfo.value) + worky(worker=plugin, cache_dir=tmp_path) + assert "Worky output cannot be None" in str(excinfo.value) -def test_wf_1(plugin, tmpdir): +def test_wf_1(plugin, tmp_path): """workflow with one task and no splitter""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow(x=2) - - checksum_before = wf._checksum - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2) - assert not results.errored, "\n".join(results.errors["error message"]) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf._checksum == checksum_before + Workflow.construct(worky) + assert worky._hash == checksum_before - assert 4 == results.outputs.out + assert 4 == outputs.out -def test_wf_1a_outpastuple(plugin, tmpdir): +def test_wf_1a_outpastuple(plugin, tmp_path): """workflow with one task and no splitter set_output takes a tuple """ @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) - return add2.out - - wf = Workflow(x=2) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert 4 == results.outputs.out - - -def test_wf_1_call_subm(plugin, tmpdir): - """using wf.__call_ with submitter""" - - @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow(x=2) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 4 == results.outputs.out + assert 4 == outputs.out -def test_wf_1_call_plug(plugin, tmpdir): - """using wf.__call_ with plugin""" +def test_wf_1_call_subm(plugin, tmp_path): + """using wf["__call_"] with submitter""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow(x=2) + worky = Worky(x=2) - outputs = wf(plugin=plugin) + outputs = worky(worker=plugin, cache_dir=tmp_path) assert 4 == outputs.out -def test_wf_1_call_noplug_nosubm(plugin, tmpdir): - """using wf.__call_ without plugin or submitter""" +def test_wf_1_call_plug(plugin, tmp_path): + """using wf["__call_"] with plugin""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow(x=2) + worky = Worky(x=2) - outputs = wf() + outputs = worky(plugin=plugin) assert 4 == outputs.out -def test_wf_1_call_exception(plugin, tmpdir): - """using wf.__call_ with plugin and submitter - should raise an exception""" +def test_wf_1_call_noplug_nosubm(plugin, tmp_path): + """using wf["__call_"] without plugin or submitter""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow(x=2) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - with pytest.raises(Exception) as e: - wf(submitter=sub, plugin=plugin) - assert "Defify submitter OR plugin" in str(e.value) - - -def test_wf_1_inp_in_call(tmpdir): - """Defining input in __call__""" + worky = Worky(x=2) - @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) - return add2.out + outputs = worky() - wf = Workflow(x=1) - results = wf(x=2) - assert 4 == results.outputs.out + assert 4 == outputs.out -def test_wf_1_upd_in_run(tmpdir): +def test_wf_1_upd_in_run(tmp_path, plugin): """Updating input in __call__""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow(x=1) - results = wf(x=2) - assert 4 == results.outputs.out + worky = Worky(x=1) + worky.x = 2 + outputs = worky(cache_dir=tmp_path, plugin=plugin) + assert 4 == outputs.out -def test_wf_2(plugin, tmpdir): +def test_wf_2(plugin, tmp_path): """workflow with 2 tasks, no splitter""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2, y=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 8 == results.outputs.out + assert 8 == outputs.out -def test_wf_2a(plugin, tmpdir): +def test_wf_2a(plugin, tmp_path): """workflow with 2 tasks, no splitter creating add2_task first (before calling add method), """ @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=2, y=3) + worky = Worky(x=2, y=3) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == outputs.out - assert 8 == results.outputs.out - -def test_wf_2b(plugin, tmpdir): +def test_wf_2b(plugin, tmp_path): """workflow with 2 tasks, no splitter creating add2_task first (before calling add method), adding inputs.x after add method """ @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=2, y=3) + worky = Worky(x=2, y=3) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert 8 == outputs.out - assert 8 == results.outputs.out - -def test_wf_2c_multoutp(plugin, tmpdir): +def test_wf_2c_multoutp(plugin, tmp_path): """workflow with 2 tasks, no splitter setting multiple outputs for the workflow """ @workflow.define(outputs=["out_add2", "out_mult"]) - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out, mult.out - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2, y=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking outputs from both nodes - assert 6 == results.outputs.out_mult - assert 8 == results.outputs.out_add2 + assert 6 == outputs.out_mult + assert 8 == outputs.out_add2 -def test_wf_2d_outpasdict(plugin, tmpdir): +def test_wf_2d_outpasdict(plugin, tmp_path): """workflow with 2 tasks, no splitter setting multiple outputs using a dictionary """ @workflow.define(outputs=["out_add2", "out_mult"]) - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out, mult.out - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2, y=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking outputs from both nodes - assert 6 == results.outputs.out_mult - assert 8 == results.outputs.out_add2 + assert 6 == outputs.out_mult + assert 8 == outputs.out_add2 @pytest.mark.flaky(reruns=3) # when dask -def test_wf_3(plugin_dask_opt, tmpdir): +def test_wf_3(plugin_dask_opt, tmp_path): """testing None value for an input""" @workflow.define - def Workflow(x, y): + def Worky(x, y): addvar = workflow.add(FunAddVarNone(a=x, b=y)) - add2 = workflow.add(Add2(x=addvar.out)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") return add2.out - wf = Workflow(x=2, y=None) + worky = Worky(x=2, y=None) - with Submitter(worker=plugin_dask_opt) as sub: - results = sub(wf) + outputs = worky(worker=plugin_dask_opt, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - - assert 4 == results.outputs.out + assert 4 == outputs.out @pytest.mark.xfail(reason="the task error doesn't propagate") -def test_wf_3a_exception(plugin, tmpdir): - """testinh wf without set input, attr.NOTHING should be set +def test_wf_3a_exception(plugin, tmp_path): + """testinh worky without set input, attr.NOTHING should be set and the function should raise an exception """ @workflow.define - def Workflow(x, y): + def Worky(x, y): addvar = workflow.add(FunAddVarNone(a=x, b=y)) - add2 = workflow.add(Add2(x=addvar.out)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") return add2.out - wf = Workflow(x=2, y=attr.NOTHING) + worky = Worky(x=2, y=attr.NOTHING) - with pytest.raises(TypeError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) - assert "unsupported" in str(excinfo.value) + with pytest.raises(TypeError, match="unsupported"): + worky(worker=plugin, cache_dir=tmp_path) -def test_wf_4(plugin, tmpdir): - """wf with a task that doesn't set one input and use the function default value""" +def test_wf_4(plugin, tmp_path): + """worky with a task that doesn't set one input and use the function default value""" @workflow.define - def Workflow(x, y): + def Worky(x, y=None): addvar = workflow.add(FunAddVarDefault(a=x)) - add2 = workflow.add(Add2(x=addvar.out)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") return add2.out - wf = Workflow(x=2) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 5 == results.outputs.out + assert 5 == outputs.out -def test_wf_4a(plugin, tmpdir): - """wf with a task that doesn't set one input, +def test_wf_4a(plugin, tmp_path): + """worky with a task that doesn't set one input, the unset input is send to the task input, so the task should use the function default value """ @workflow.define - def Workflow(x, y): - addvar = workflow.add(FunAddVarDefault(a=x, y=y)) - add2 = workflow.add(Add2(x=addvar.out)) + def Worky(x): + addvar = workflow.add(FunAddVarDefault(a=x)) + add2 = workflow.add(Add2(x=addvar.out), name="add2") return add2.out - wf = Workflow(x=2) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 5 == results.outputs.out + assert 5 == outputs.out -def test_wf_5(plugin, tmpdir): - """wf with two outputs connected to the task outputs +def test_wf_5(plugin, tmp_path): + """worky with two outputs connected to the task outputs one set_output """ @workflow.define(outputs=["out_sum", "out_sub"]) - def Workflow(x, y): + def Worky(x, y): addsub = workflow.add(FunAddSubVar(a=x, b=y)) return addsub.sum, addsub.sub - wf = Workflow(x=3, y=2) + worky = Worky(x=3, y=2) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert 5 == outputs.out_sum + assert 1 == outputs.out_sub - assert 5 == results.outputs.out_sum - assert 1 == results.outputs.out_sub - -def test_wf_5a(plugin, tmpdir): - """wf with two outputs connected to the task outputs, +def test_wf_5a(plugin, tmp_path): + """worky with two outputs connected to the task outputs, set_output set twice """ - @workflow.define - def Workflow(x, y): + @workflow.define(outputs=["out_sum", "out_sub"]) + def Worky(x, y): addsub = workflow.add(FunAddSubVar(a=x, b=y)) - return addsub.sum # out_sum - return addsub.sub # out_sub - - wf = Workflow(x=3, y=2) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + return addsub.sum, addsub.sub - assert not results.errored, "\n".join(results.errors["error message"]) + worky = Worky(x=3, y=2) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 5 == results.outputs.out_sum - assert 1 == results.outputs.out_sub + assert 5 == outputs.out_sum + assert 1 == outputs.out_sub -def test_wf_6(plugin, tmpdir): - """wf with two tasks and two outputs connected to both tasks, +def test_wf_6(plugin, tmp_path): + """worky with two tasks and two outputs connected to both tasks, one set_output """ @workflow.define(outputs=["out1", "out2"]) - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return mult.out, add2.out # - wf = Workflow(x=2, y=3) + worky = Worky(x=2, y=3) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert 6 == outputs.out1 + assert 8 == outputs.out2 - assert 6 == results.outputs.out1 - assert 8 == results.outputs.out2 - -def test_wf_6a(plugin, tmpdir): - """wf with two tasks and two outputs connected to both tasks, +def test_wf_6a(plugin, tmp_path): + """worky with two tasks and two outputs connected to both tasks, set_output used twice """ @workflow.define(outputs=["out1", "out2"]) - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return mult.out, add2.out - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2, y=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 6 == results.outputs.out1 - assert 8 == results.outputs.out2 + assert 6 == outputs.out1 + assert 8 == outputs.out2 -def test_wf_st_1(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow""" +def test_wf_st_1(plugin, tmp_path): + """Worky with one task, a splitter for the workflow""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) - checksum_before = wf._checksum - with Submitter(cache_dir=tmpdir) as sub: - results = sub(wf) + checksum_before = worky._hash + outputs = worky(cache_dir=tmp_path, plugin=plugin) - assert not results.errored, "\n".join(results.errors["error message"]) - - assert wf._checksum == checksum_before + Workflow.construct(worky) + assert worky._hash == checksum_before # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out[0] == 3 - assert results.outputs.out[1] == 4 + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_call_subm(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow""" +def test_wf_st_1_call_subm(plugin, tmp_path): + """Worky with one task, a splitter for the workflow""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out[0] == 3 - assert results.outputs.out[1] == 4 + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_call_plug(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow - using Workflow.__call__(plugin) +def test_wf_st_1_call_plug(plugin, tmp_path): + """Worky with one task, a splitter for the workflow + using Worky.__call__(plugin) """ @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) - outputs = wf(plugin=plugin) + outputs = worky(plugin=plugin) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert outputs.out[0] == 3 assert outputs.out[1] == 4 -def test_wf_st_1_call_selfplug(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow - using Workflow.__call__() and using self.plugin +def test_wf_st_1_call_selfplug(plugin, tmp_path): + """Worky with one task, a splitter for the workflow + using Worky.__call__() and using self.plugin """ @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) - outputs = wf() + outputs = worky() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert outputs.out[0] == 3 assert outputs.out[1] == 4 -def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow - using Workflow.__call__() without plugin and submitter +def test_wf_st_1_call_noplug_nosubm(plugin, tmp_path): + """Worky with one task, a splitter for the workflow + using Worky.__call__() without plugin and submitter (a submitter should be created within the __call__ function) """ @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) - outputs = wf() + outputs = worky() # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] assert outputs.out[0] == 3 assert outputs.out[1] == 4 -def test_wf_st_1_inp_in_call(tmpdir): +def test_wf_st_1_inp_in_call(tmp_path, plugin): """Defining input in __call__""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow().split("x", x=[1, 2]) - results = wf() - assert results.outputs.out[0] == 3 - assert results.outputs.out[1] == 4 + worky = Worky().split("x", x=[1, 2]) + outputs = worky(cache_dir=tmp_path, plugin=plugin) # + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_1_upd_inp_call(tmpdir): +def test_wf_st_1_upd_inp_call(tmp_path, plugin): """Updating input in __call___""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow().split("x", x=[11, 22]) - results = wf(x=[1, 2]) - assert results.outputs.out[0] == 3 - assert results.outputs.out[1] == 4 + worky = Worky().split("x", x=[11, 22]) + outputs = worky(cache_dir=tmp_path, plugin=plugin) # x=[1, 2] + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_st_noinput_1(plugin, tmpdir): - """Workflow with one task, a splitter for the workflow""" +def test_wf_st_noinput_1(plugin, tmp_path): + """Worky with one task, a splitter for the workflow""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x).split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x).split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[]) + worky = Worky(x=[]) - checksum_before = wf._checksum - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + wf = Workflow.construct(worky) + assert worky._hash == checksum_before - assert wf._checksum == checksum_before + assert outputs.out == [] - assert results == [] - -def test_wf_ndst_1(plugin, tmpdir): +def test_wf_ndst_1(plugin, tmp_path): """workflow with one task, a splitter on the task level""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2().split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) - - checksum_before = wf._checksum - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2]) - assert not results.errored, "\n".join(results.errors["error message"]) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf._checksum == checksum_before + wf = Workflow.construct(worky) + assert worky._hash == checksum_before # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out == [3, 4] + assert outputs.out == [3, 4] -def test_wf_ndst_updatespl_1(plugin, tmpdir): +def test_wf_ndst_updatespl_1(plugin, tmp_path): """workflow with one task, a splitter on the task level is added *after* calling add """ @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(name="add2").split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out == [3, 4] + assert outputs.out == [3, 4] -def test_wf_ndst_updatespl_1a(plugin, tmpdir): +def test_wf_ndst_updatespl_1a(plugin, tmp_path): """workflow with one task (initialize before calling add), a splitter on the task level is added *after* calling add """ @workflow.define - def Workflow(x): - add2 = workflow.add(Add2().split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out == [3, 4] + assert outputs.out == [3, 4] -def test_wf_ndst_updateinp_1(plugin, tmpdir): +def test_wf_ndst_updateinp_1(plugin, tmp_path): """workflow with one task, a splitter on the task level, updating input of the task after calling add """ @workflow.define - def Workflow(x, y): - add2 = workflow.add(Add2(x=x).split("x", x=y)) + def Worky(x, y): + add2 = workflow.add(Add2().split("x", x=y), name="add2") return add2.out - wf = Workflow(x=[1, 2], y=[11, 12]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [13, 14] + assert outputs.out == [13, 14] -def test_wf_ndst_noinput_1(plugin, tmpdir): +def test_wf_ndst_noinput_1(plugin, tmp_path): """workflow with one task, a splitter on the task level""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2().split("x", x=x)) + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x), name="add2") return add2.out - wf = Workflow(x=[]) + worky = Worky(x=[]) - checksum_before = wf._checksum - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + wf = Workflow.construct(worky) + assert worky._hash == checksum_before - assert wf._checksum == checksum_before + assert outputs.out == [] - assert results.outputs.out == [] - -def test_wf_st_2(plugin, tmpdir): +def test_wf_st_2(plugin, tmp_path): """workflow with one task, splitters and combiner for workflow""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2(x=x)) + def Worky(x): + add2 = workflow.add(Add2(x=x), name="add2") return add2.out - wf = Workflow().split("x", x=[1, 2]).combine(combiner="x") - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky().split("x", x=[1, 2]).combine(combiner="x") - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out[0] == 3 - assert results.outputs.out[1] == 4 + assert outputs.out[0] == 3 + assert outputs.out[1] == 4 -def test_wf_ndst_2(plugin, tmpdir): +def test_wf_ndst_2(plugin, tmp_path): """workflow with one task, splitters and combiner on the task level""" @workflow.define - def Workflow(x): - add2 = workflow.add(Add2().split("x", x=x).combine(combiner="x")) + def Worky(x): + add2 = workflow.add(Add2().split("x", x=x).combine(combiner="x"), name="add2") return add2.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [({"test7.x": 1}, 3), ({"test7.x": 2}, 4)] - assert results.outputs.out == [3, 4] + assert outputs.out == [3, 4] # workflows with structures A -> B -def test_wf_st_3(plugin, tmpdir): - """workflow with 2 tasks, splitter on wf level""" +def test_wf_st_3(plugin, tmp_path): + """workflow with 2 tasks, splitter on worky level""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow().split(("x", "y"), x=[1, 2], y=[11, 12]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky().split(("x", "y"), x=[1, 2], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) expected = [ ({"wfst_3.x": 1, "wfst_3.y": 11}, 13), @@ -791,275 +688,229 @@ def Workflow(x, y): ] for i, res in enumerate(expected): - assert results[i].output.out == res[1] - - # checking the return_inputs option, either return_inputs is True or "val", - # it should give values of inputs that corresponds to the specific element - results_verb = wf.result(return_inputs=True) - results_verb_val = wf.result(return_inputs="val") - for i, res in enumerate(expected): - assert (results_verb[i][0], results_verb[i][1].output.out) == res - assert (results_verb_val[i][0], results_verb_val[i][1].output.out) == res + assert outputs.out[i] == res[1] - # checking the return_inputs option return_inputs="ind" - # it should give indices of inputs (instead of values) for each element - results_verb_ind = wf.result(return_inputs="ind") - for i, res in enumerate(expected_ind): - assert (results_verb_ind[i][0], results_verb_ind[i][1].output.out) == res - -def test_wf_ndst_3(plugin, tmpdir): +def test_wf_ndst_3(plugin, tmp_path): """Test workflow with 2 tasks, splitter on a task level""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=[1, 2], y=[11, 12]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26)] - assert results.outputs.out == [13, 26] + assert outputs.out == [13, 26] -def test_wf_st_4(plugin, tmpdir): +def test_wf_st_4(plugin, tmp_path): """workflow with two tasks, scalar splitter and combiner for the workflow""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow().split(("x", "y"), x=[1, 2], y=[11, 12]).combine("x") - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + worky = Worky().split(("x", "y"), x=[1, 2], y=[11, 12]).combine("x") + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] - assert results.outputs.out[0] == 13 - assert results.outputs.out[1] == 26 + assert outputs.out[0] == 13 + assert outputs.out[1] == 26 -def test_wf_ndst_4(plugin, tmpdir): +def test_wf_ndst_4(plugin, tmp_path): """workflow with two tasks, scalar splitter and combiner on tasks level""" @workflow.define - def Workflow(a, b): - mult = workflow.add(Multiply().split(("x", "y"), x=a, y=b)) - add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) + def Worky(a, b): + mult = workflow.add(Multiply().split(("x", "y"), x=a, y=b), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") return add2.out - wf = Workflow(a=[1, 2], b=[11, 12]) + worky = Worky(a=[1, 2], b=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # expected: [ # ({"test7.x": 1, "test7.y": 11}, 13), ({"test7.x": 2, "test.y": 12}, 26) # ] - assert results.outputs.out == [13, 26] + assert outputs.out == [13, 26] -def test_wf_st_5(plugin, tmpdir): +def test_wf_st_5(plugin, tmp_path): """workflow with two tasks, outer splitter and no combiner""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out).split(["x", "y"], x=x, y=y)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out).split(["x", "y"], x=x, y=y), name="add2") return add2.out - wf = Workflow(x=[1, 2], y=[11, 12]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out[0] == 13 - assert results.outputs.out[1] == 14 - assert results.outputs.out[2] == 24 - assert results.outputs.out[3] == 26 + assert outputs.out[0] == 13 + assert outputs.out[1] == 14 + assert outputs.out[2] == 24 + assert outputs.out[3] == 26 -def test_wf_ndst_5(plugin, tmpdir): +def test_wf_ndst_5(plugin, tmp_path): """workflow with two tasks, outer splitter on tasks level and no combiner""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=[1, 2], y=[11, 12]) + worky = Worky(x=[1, 2], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out[0] == 13 + assert outputs.out[1] == 14 + assert outputs.out[2] == 24 + assert outputs.out[3] == 26 - assert results.outputs.out[0] == 13 - assert results.outputs.out[1] == 14 - assert results.outputs.out[2] == 24 - assert results.outputs.out[3] == 26 - -def test_wf_st_6(plugin, tmpdir): +def test_wf_st_6(plugin, tmp_path): """workflow with two tasks, outer splitter and combiner for the workflow""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out[0][0] == 13 - assert results.outputs.out[0][1] == 24 - assert results.outputs.out[0][2] == 35 - assert results.outputs.out[1][0] == 14 - assert results.outputs.out[1][1] == 26 - assert results.outputs.out[1][2] == 38 + assert outputs.out[0][0] == 13 + assert outputs.out[0][1] == 24 + assert outputs.out[0][2] == 35 + assert outputs.out[1][0] == 14 + assert outputs.out[1][1] == 26 + assert outputs.out[1][2] == 38 -def test_wf_ndst_6(plugin, tmpdir): +def test_wf_ndst_6(plugin, tmp_path): """workflow with two tasks, outer splitter and combiner on tasks level""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) + def Worky(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") return add2.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [[13, 24, 35], [14, 26, 38]] + assert outputs.out == [[13, 24, 35], [14, 26, 38]] -def test_wf_ndst_7(plugin, tmpdir): +def test_wf_ndst_7(plugin, tmp_path): """workflow with two tasks, outer splitter and (full) combiner for first node only""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split("x", x=x, y=y).combine("x")) + def Worky(x, y): + mult = workflow.add(Multiply(y=y).split(x=x).combine("x"), name="mult") iden = workflow.add(Identity(x=mult.out)) return iden.out - wf = Workflow(x=[1, 2, 3], y=11) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=11) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [11, 22, 33] + assert outputs.out == [11, 22, 33] -def test_wf_ndst_8(plugin, tmpdir): +def test_wf_ndst_8(plugin, tmp_path): """workflow with two tasks, outer splitter and (partial) combiner for first task only""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) + def Worky(x, y): + mult = workflow.add( + Multiply().split(["x", "y"], x=x, y=y).combine("x"), name="mult" + ) iden = workflow.add(Identity(x=mult.out)) return iden.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out == [[11, 22, 33], [12, 24, 36]] - assert results.outputs.out[0] == [11, 22, 33] - assert results.outputs.out[1] == [12, 24, 36] - -def test_wf_ndst_9(plugin, tmpdir): +def test_wf_ndst_9(plugin, tmp_path): """workflow with two tasks, outer splitter and (full) combiner for first task only""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine(["x", "y"])) + def Worky(x, y): + mult = workflow.add( + Multiply().split(["x", "y"], x=x, y=y).combine(["x", "y"]), name="mult" + ) iden = workflow.add(Identity(x=mult.out)) return iden.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.outputs.out == [11, 12, 22, 24, 33, 36] + assert outputs.out == [11, 12, 22, 24, 33, 36] # workflows with structures A -> B -> C -def test_wf_3sernd_ndst_1(plugin, tmpdir): +def test_wf_3sernd_ndst_1(plugin, tmp_path): """workflow with three "serial" tasks, checking if the splitter is propagating""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) + def Worky(x, y): + mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y), name="mult") add2_1st = workflow.add(Add2(x=mult.out), name="add2_1st") add2_2nd = workflow.add(Add2(x=add2_1st.out), name="add2_2nd") return add2_2nd.out - wf = Workflow(x=[1, 2], y=[11, 12]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # splitter from the first task should propagate to all tasks, # splitter_rpn should be the same in all tasks - assert wf.mult.state.splitter == ["mult.x", "mult.y"] - assert wf.add2_1st.state.splitter == "_mult" - assert wf.add2_2nd.state.splitter == "_add2_1st" + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["mult.x", "mult.y"] + assert wf["add2_1st"].state.splitter == "_mult" + assert wf["add2_2nd"].state.splitter == "_add2_1st" assert ( ["mult.x", "mult.y", "*"] - == wf.mult.state.splitter_rpn - == wf.add2_1st.state.splitter_rpn - == wf.add2_2nd.state.splitter_rpn + == wf["mult"].state.splitter_rpn + == wf["add2_1st"].state.splitter_rpn + == wf["add2_2nd"].state.splitter_rpn ) - assert results.outputs.out[0] == 15 - assert results.outputs.out[1] == 16 - assert results.outputs.out[2] == 26 - assert results.outputs.out[3] == 28 + assert outputs.out == [15, 16, 26, 28] -def test_wf_3sernd_ndst_1a(plugin, tmpdir): +def test_wf_3sernd_ndst_1a(plugin, tmp_path): """ workflow with three "serial" tasks, checking if the splitter is propagating first task has a splitter that propagates to the 2nd task, @@ -1067,230 +918,204 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2_1st = workflow.add(Add2().split("x", x=x), name="add2_1st") - mult = workflow.add(Multiply(x=add2_1st.out).split("y", y=y)) + mult = workflow.add(Multiply(x=add2_1st.out).split("y", y=y), name="mult") add2_2nd = workflow.add(Add2(x=mult.out), name="add2_2nd") return add2_2nd.out - wf = Workflow(x=[1, 2], y=[11, 12]) + worky = Worky(x=[1, 2], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # splitter from the 1st task should propagate and the 2nd task should add one more # splitter_rpn for the 2nd and the 3rd task should be the same - assert wf.add2_1st.state.splitter == "add2_1st.x" - assert wf.mult.state.splitter == ["_add2_1st", "mult.y"] - assert wf.add2_2nd.state.splitter == "_mult" + wf = Workflow.construct(worky) + assert wf["add2_1st"].state.splitter == "add2_1st.x" + assert wf["mult"].state.splitter == ["_add2_1st", "mult.y"] + assert wf["add2_2nd"].state.splitter == "_mult" assert ( ["add2_1st.x", "mult.y", "*"] - == wf.mult.state.splitter_rpn - == wf.add2_2nd.state.splitter_rpn + == wf["mult"].state.splitter_rpn + == wf["add2_2nd"].state.splitter_rpn ) - assert results.outputs.out[0] == 35 - assert results.outputs.out[1] == 38 - assert results.outputs.out[2] == 46 - assert results.outputs.out[3] == 50 + assert outputs.out == [35, 38, 46, 50] # workflows with structures A -> C, B -> C @pytest.mark.flaky(reruns=3) # when dask -def test_wf_3nd_st_1(plugin_dask_opt, tmpdir): +def test_wf_3nd_st_1(plugin_dask_opt, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the workflow level """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2(x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]) - - with Submitter(worker=plugin_dask_opt) as sub: - results = sub(wf) + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin_dask_opt, cache_dir=tmp_path) - assert len(results) == 6 - assert results.outputs.out[0] == 39 - assert results.outputs.out[1] == 42 - assert results.outputs.out[5] == 70 + assert outputs.out[0] == 39 + assert outputs.out[1] == 42 + assert outputs.out[5] == 70 @pytest.mark.flaky(reruns=3) # when dask -def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): +def test_wf_3nd_ndst_1(plugin_dask_opt, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the tasks levels """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) - - with Submitter(worker=plugin_dask_opt) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin_dask_opt, cache_dir=tmp_path) - assert len(results.outputs.out) == 6 - assert results.outputs.out == [39, 42, 52, 56, 65, 70] + assert len(outputs.out) == 6 + assert outputs.out == [39, 42, 52, 56, 65, 70] -def test_wf_3nd_st_2(plugin, tmpdir): +def test_wf_3nd_st_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner on the workflow level """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2(x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("x") - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out[0][0] == 39 + assert outputs.out[0][1] == 52 + assert outputs.out[0][2] == 65 + assert outputs.out[1][0] == 42 + assert outputs.out[1][1] == 56 + assert outputs.out[1][2] == 70 - assert len(results) == 2 - assert results.outputs.out[0][0] == 39 - assert results.outputs.out[0][1] == 52 - assert results.outputs.out[0][2] == 65 - assert results.outputs.out[1][0] == 42 - assert results.outputs.out[1][1] == 56 - assert results.outputs.out[1][2] == 70 - -def test_wf_3nd_ndst_2(plugin, tmpdir): +def test_wf_3nd_ndst_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner on the tasks levels """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out).combine("add2x.x")) + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).combine("add2x.x"), name="mult" + ) return mult.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) - - with Submitter(cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results.outputs.out) == 2 - assert results.outputs.out[0] == [39, 52, 65] - assert results.outputs.out[1] == [42, 56, 70] + assert len(outputs.out) == 2 + assert outputs.out[0] == [39, 52, 65] + assert outputs.out[1] == [42, 56, 70] -def test_wf_3nd_st_3(plugin, tmpdir): +def test_wf_3nd_st_3(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner (from the second task) on the workflow level """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2(x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine("y") - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results) == 3 - assert results.outputs.out[0][0] == 39 - assert results.outputs.out[0][1] == 42 - assert results.outputs.out[1][0] == 52 - assert results.outputs.out[1][1] == 56 - assert results.outputs.out[2][0] == 65 - assert results.outputs.out[2][1] == 70 + assert outputs.out[0][0] == 39 + assert outputs.out[0][1] == 42 + assert outputs.out[1][0] == 52 + assert outputs.out[1][1] == 56 + assert outputs.out[2][0] == 65 + assert outputs.out[2][1] == 70 -def test_wf_3nd_ndst_3(plugin, tmpdir): +def test_wf_3nd_ndst_3(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and partial combiner (from the second task) on the tasks levels """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out).combine("add2y.x")) + mult = workflow.add( + Multiply(x=add2x.out, y=add2y.out).combine("add2y.x"), name="mult" + ) return mult.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) - - with Submitter(worker="debug", cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results.outputs.out) == 3 - assert results.outputs.out[0] == [39, 42] - assert results.outputs.out[1] == [52, 56] - assert results.outputs.out[2] == [65, 70] + assert len(outputs.out) == 3 + assert outputs.out[0] == [39, 42] + assert outputs.out[1] == [52, 56] + assert outputs.out[2] == [65, 70] -def test_wf_3nd_st_4(plugin, tmpdir): +def test_wf_3nd_st_4(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and full combiner on the workflow level """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2(x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) + worky = Worky().split(["x", "y"], x=[1, 2, 3], y=[11, 12]).combine(["x", "y"]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out[0] == 39 + assert outputs.out[1] == 42 + assert outputs.out[2] == 52 + assert outputs.out[3] == 56 + assert outputs.out[4] == 65 + assert outputs.out[5] == 70 - assert len(results) == 6 - assert results.outputs.out[0] == 39 - assert results.outputs.out[1] == 42 - assert results.outputs.out[2] == 52 - assert results.outputs.out[3] == 56 - assert results.outputs.out[4] == 65 - assert results.outputs.out[5] == 70 - -def test_wf_3nd_ndst_4(plugin, tmpdir): +def test_wf_3nd_ndst_4(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter and full combiner on the tasks levels """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add( @@ -1298,59 +1123,51 @@ def Workflow(x, y): ) return mult.out - wf = Workflow(x=[1, 2, 3], y=[11, 12]) + worky = Worky(x=[1, 2, 3], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert len(results.outputs.out) == 6 - assert results.outputs.out == [39, 42, 52, 56, 65, 70] + assert len(outputs.out) == 6 + assert outputs.out == [39, 42, 52, 56, 65, 70] -def test_wf_3nd_st_5(plugin, tmpdir): +def test_wf_3nd_st_5(plugin, tmp_path): """workflow with three tasks (A->C, B->C) and three fields in the splitter, splitter and partial combiner (from the second task) on the workflow level """ @workflow.define - def Workflow(x, y, z): + def Worky(x, y, z): add2x = workflow.add(Add2(x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") addvar = workflow.add(FunAddVar3(a=add2x.out, b=add2y.out, c=z)) return addvar.out - wf = ( - Workflow() - .split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]) - .combine("y") + worky = ( + Worky().split(["x", "y", "z"], x=[2, 3], y=[11, 12], z=[10, 100]).combine("y") ) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results) == 4 - assert results.outputs.out[0][0] == 27 - assert results.outputs.out[0][1] == 28 - assert results.outputs.out[1][0] == 117 - assert results.outputs.out[1][1] == 118 - assert results.outputs.out[2][0] == 28 - assert results.outputs.out[2][1] == 29 - assert results.outputs.out[3][0] == 118 - assert results.outputs.out[3][1] == 119 + assert outputs.out[0][0] == 27 + assert outputs.out[0][1] == 28 + assert outputs.out[1][0] == 117 + assert outputs.out[1][1] == 118 + assert outputs.out[2][0] == 28 + assert outputs.out[2][1] == 29 + assert outputs.out[3][0] == 118 + assert outputs.out[3][1] == 119 -def test_wf_3nd_ndst_5(plugin, tmpdir): +def test_wf_3nd_ndst_5(plugin, tmp_path): """workflow with three tasks (A->C, B->C) and three fields in the splitter, all tasks have splitters and the last one has a partial combiner (from the 2nd) """ @workflow.define - def Workflow(x, y, z): + def Worky(x, y, z): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=y), name="add2y") addvar = workflow.add( @@ -1359,29 +1176,26 @@ def Workflow(x, y, z): return addvar.out - wf = Workflow(x=[2, 3], y=[11, 12], z=[10, 100]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[2, 3], y=[11, 12], z=[10, 100]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results.outputs.out) == 4 - assert results.outputs.out[0] == [27, 28] - assert results.outputs.out[1] == [117, 118] - assert results.outputs.out[2] == [28, 29] - assert results.outputs.out[3] == [118, 119] + assert len(outputs.out) == 4 + assert outputs.out[0] == [27, 28] + assert outputs.out[1] == [117, 118] + assert outputs.out[2] == [28, 29] + assert outputs.out[3] == [118, 119] # checking all directories -def test_wf_3nd_ndst_6(plugin, tmpdir): +def test_wf_3nd_ndst_6(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, the third one uses scalar splitter from the previous ones and a combiner """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=y), name="add2y") mult = workflow.add( @@ -1391,23 +1205,20 @@ def Workflow(x, y): ) return mult.out - wf = Workflow(x=[1, 2], y=[11, 12]) + worky = Worky(x=[1, 2], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out == [39, 56] - assert results.outputs.out == [39, 56] - -def test_wf_3nd_ndst_7(plugin, tmpdir): +def test_wf_3nd_ndst_7(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, the third one uses scalar splitter from the previous ones """ @workflow.define - def Workflow(x): + def Worky(x): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2().split("x", x=x), name="add2y") mult = workflow.add( @@ -1415,24 +1226,21 @@ def Workflow(x): ) return mult.out - wf = Workflow(x=[1, 2]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [9, 16] + assert outputs.out == [9, 16] # workflows with structures A -> B -> C with multiple connections -def test_wf_3nd_8(tmpdir): +def test_wf_3nd_8(tmp_path): """workflow with three tasks A->B->C vs two tasks A->C with multiple connections""" @workflow.define(outputs=["out1", "out2", "out1a", "out2a"]) - def Workflow(zip): + def Worky(zip): iden2flds_1 = workflow.add( Identity2Flds(x2="Hoi").split("x1", x1=zip), name="iden2flds_1" @@ -1453,107 +1261,109 @@ def Workflow(zip): return iden2flds_2.out1, iden2flds_2.out2, iden2flds_2a.out1, iden2flds_2a.out2 - wf = Workflow(zip=[["test1", "test3", "test5"], ["test2", "test4", "test6"]]) + worky = Worky(zip=[["test1", "test3", "test5"], ["test2", "test4", "test6"]]) with Submitter(worker="cf") as sub: - res = sub(wf) + res = sub(worky) assert ( res.outputs.out1 == res.outputs.out1a == [["test1", "test3", "test5"], ["test2", "test4", "test6"]] ) - assert res.outputs.out2 == res.output.out2a == ["Hoi", "Hoi"] + assert res.outputs.out2 == res.outputs.out2a == ["Hoi", "Hoi"] # workflows with Left and Right part in splitters A -> B (L&R parts of the splitter) -def test_wf_ndstLR_1(plugin, tmpdir): +def test_wf_ndstLR_1(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has its own simple splitter and the Left part from the first task should be added """ @workflow.define - def Workflow(x, y): - add2 = workflow.add(Add2().split("x", x=x)) - mult = workflow.add(Multiply(x=add2.out).split("y", y=y)) + def Worky(x, y): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + mult = workflow.add(Multiply(x=add2.out).split("y", y=y), name="mult") return mult.out - wf = Workflow(x=[1, 2], y=[11, 12]) + worky = Worky(x=[1, 2], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.mult.state.splitter == ["_add2", "mult.y"] - assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["_add2", "mult.y"] + assert wf["mult"].state.splitter_rpn == ["add2.x", "mult.y", "*"] # expected: [({"add2.x": 1, "mult.y": 11}, 33), ({"add2.x": 1, "mult.y": 12}, 36), # ({"add2.x": 2, "mult.y": 11}, 44), ({"add2.x": 2, "mult.y": 12}, 48)] - assert results.outputs.out == [33, 36, 44, 48] + assert outputs.out == [33, 36, 44, 48] -def test_wf_ndstLR_1a(plugin, tmpdir): +def test_wf_ndstLR_1a(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has splitter that has Left part (from previous state) and the Right part (it's own splitter) """ @workflow.define - def Workflow(x, y): - add2 = workflow.add(Add2().split("x", x=x)) - mult = workflow.add(Multiply().split(["_add2", "y"], x=add2.out, y=y)) + def Worky(x, y): + add2 = workflow.add(Add2().split("x", x=x), name="add2") + mult = workflow.add( + Multiply().split(["_add2", "y"], x=add2.out, y=y), name="mult" + ) return mult.out - wf = Workflow(x=[1, 2], y=[11, 12]) + worky = Worky(x=[1, 2], y=[11, 12]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.mult.state.splitter == ["_add2", "mult.y"] - assert wf.mult.state.splitter_rpn == ["add2.x", "mult.y", "*"] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["_add2", "mult.y"] + assert wf["mult"].state.splitter_rpn == ["add2.x", "mult.y", "*"] # expected: [({"add2.x": 1, "mult.y": 11}, 33), ({"add2.x": 1, "mult.y": 12}, 36), # ({"add2.x": 2, "mult.y": 11}, 44), ({"add2.x": 2, "mult.y": 12}, 48)] - assert results.outputs.out == [33, 36, 44, 48] + assert outputs.out == [33, 36, 44, 48] -def test_wf_ndstLR_2(plugin, tmpdir): +def test_wf_ndstLR_2(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has its own outer splitter and the Left part from the first task should be added """ @workflow.define - def Workflow(x, y, z): - add2 = workflow.add(Add2().split("x", x=x)) + def Worky(x, y, z): + add2 = workflow.add(Add2().split("x", x=x), name="add2") addvar = workflow.add(FunAddVar3(a=add2.out).split(["b", "c"], b=y, c=z)) return addvar.out - wf = Workflow(x=[1, 2, 3], y=[10, 20], z=[100, 200]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=[10, 20], z=[100, 200]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] - assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] + wf = Workflow.construct(worky) + assert wf["addvar"].state.splitter == ["_add2", ["addvar.b", "addvar.c"]] + assert wf["addvar"].state.splitter_rpn == [ + "add2.x", + "addvar.b", + "addvar.c", + "*", + "*", + ] # expected: [({"add2.x": 1, "mult.b": 10, "mult.c": 100}, 113), # ({"add2.x": 1, "mult.b": 10, "mult.c": 200}, 213), # ({"add2.x": 1, "mult.b": 20, "mult.c": 100}, 123), # ({"add2.x": 1, "mult.b": 20, "mult.c": 200}, 223), # ...] - assert results.outputs.out == [ + assert outputs.out == [ 113, 213, 123, @@ -1569,38 +1379,42 @@ def Workflow(x, y, z): ] -def test_wf_ndstLR_2a(plugin, tmpdir): +def test_wf_ndstLR_2a(plugin, tmp_path): """Test workflow with 2 tasks, splitters on tasks levels The second task has splitter that has Left part (from previous state) and the Right part (it's own outer splitter) """ @workflow.define - def Workflow(x, y, z): - add2 = workflow.add(Add2().split("x", x=x)) + def Worky(x, y, z): + add2 = workflow.add(Add2().split("x", x=x), name="add2") addvar = workflow.add( FunAddVar3(a=add2.out).split(["_add2", ["b", "c"]], b=y, c=z) ) return addvar.out - wf = Workflow(x=[1, 2, 3], y=[10, 20], z=[100, 200]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2, 3], y=[10, 20], z=[100, 200]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) # checking if the splitter is created properly - assert wf.addvar.state.splitter == ["_add2", ["addvar.b", "addvar.c"]] - assert wf.addvar.state.splitter_rpn == ["add2.x", "addvar.b", "addvar.c", "*", "*"] + wf = Workflow.construct(worky) + assert wf["addvar"].state.splitter == ["_add2", ["addvar.b", "addvar.c"]] + assert wf["addvar"].state.splitter_rpn == [ + "add2.x", + "addvar.b", + "addvar.c", + "*", + "*", + ] # expected: [({"add2.x": 1, "mult.b": 10, "mult.c": 100}, 113), # ({"add2.x": 1, "mult.b": 10, "mult.c": 200}, 213), # ({"add2.x": 1, "mult.b": 20, "mult.c": 100}, 123), # ({"add2.x": 1, "mult.b": 20, "mult.c": 200}, 223), # ...] - assert results.outputs.out == [ + assert outputs.out == [ 113, 213, 123, @@ -1619,111 +1433,103 @@ def Workflow(x, y, z): # workflows with inner splitters A -> B (inner spl) -def test_wf_ndstinner_1(plugin, tmpdir): +def test_wf_ndstinner_1(plugin, tmp_path): """workflow with 2 tasks, the second task has inner splitter """ @workflow.define(outputs=["out_list", "out"]) - def Workflow(x: int): + def Worky(x: int): list = workflow.add(ListOutput(x=x)) - add2 = workflow.add(Add2().split("x", x=list.out)) + add2 = workflow.add(Add2().split("x", x=list.out), name="add2") return list.out, add2.out - wf = Workflow(x=1) # - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=1) - assert not results.errored, "\n".join(results.errors["error message"]) + wf = Workflow.construct(worky) + assert wf["add2"].state.splitter == "add2.x" + assert wf["add2"].state.splitter_rpn == ["add2.x"] - assert wf.add2.state.splitter == "add2.x" - assert wf.add2.state.splitter_rpn == ["add2.x"] + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out_list == [1, 2, 3] - assert results.outputs.out == [3, 4, 5] + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [3, 4, 5] -def test_wf_ndstinner_2(plugin, tmpdir): +def test_wf_ndstinner_2(plugin, tmp_path): """workflow with 2 tasks, the second task has two inputs and inner splitter from one of the input """ @workflow.define(outputs=["out_list", "out"]) - def Workflow(x, y): + def Worky(x, y): list = workflow.add(ListOutput(x=x)) - mult = workflow.add(Multiply(y=y).split("x", x=list.out)) + mult = workflow.add(Multiply(y=y).split("x", x=list.out), name="mult") return list.out, mult.out - wf = Workflow(x=1, y=10) # - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=1, y=10) # - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.mult.state.splitter == "mult.x" - assert wf.mult.state.splitter_rpn == ["mult.x"] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == "mult.x" + assert wf["mult"].state.splitter_rpn == ["mult.x"] - assert results.outputs.out_list == [1, 2, 3] - assert results.outputs.out == [10, 20, 30] + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [10, 20, 30] -def test_wf_ndstinner_3(plugin, tmpdir): +def test_wf_ndstinner_3(plugin, tmp_path): """workflow with 2 tasks, the second task has two inputs and outer splitter that includes an inner field """ @workflow.define(outputs=["out_list", "out"]) - def Workflow(x, y): + def Worky(x, y): list = workflow.add(ListOutput(x=x)) - mult = workflow.add(Multiply().split(["x", "y"], x=list.out, y=y)) + mult = workflow.add(Multiply().split(["x", "y"], x=list.out, y=y), name="mult") return list.out, mult.out - wf = Workflow(x=1, y=[10, 100]) + worky = Worky(x=1, y=[10, 100]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["mult.x", "mult.y"] + assert wf["mult"].state.splitter_rpn == ["mult.x", "mult.y", "*"] - assert wf.mult.state.splitter == ["mult.x", "mult.y"] - assert wf.mult.state.splitter_rpn == ["mult.x", "mult.y", "*"] + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [10, 100, 20, 200, 30, 300] - assert results.outputs.out_list == [1, 2, 3] - assert results.outputs.out == [10, 100, 20, 200, 30, 300] - -def test_wf_ndstinner_4(plugin, tmpdir): +def test_wf_ndstinner_4(plugin, tmp_path): """workflow with 3 tasks, the second task has two inputs and inner splitter from one of the input, the third task has no its own splitter """ @workflow.define(outputs=["out_list", "out"]) - def Workflow(x, y): + def Worky(x, y): list = workflow.add(ListOutput(x=x)) - mult = workflow.add(Multiply(y=y).split("x", x=list.out)) - add2 = workflow.add(Add2(x=mult.out)) + mult = workflow.add(Multiply(y=y).split("x", x=list.out), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return list.out, add2.out - wf = Workflow(x=1, y=10) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=1, y=10) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.mult.state.splitter == "mult.x" - assert wf.mult.state.splitter_rpn == ["mult.x"] - assert wf.add2.state.splitter == "_mult" - assert wf.add2.state.splitter_rpn == ["mult.x"] + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == "mult.x" + assert wf["mult"].state.splitter_rpn == ["mult.x"] + assert wf["add2"].state.splitter == "_mult" + assert wf["add2"].state.splitter_rpn == ["mult.x"] - assert results.outputs.out_list == [1, 2, 3] - assert results.outputs.out == [12, 22, 32] + assert outputs.out_list == [1, 2, 3] + assert outputs.out == [12, 22, 32] -def test_wf_ndstinner_5(plugin, tmpdir): +def test_wf_ndstinner_5(plugin, tmp_path): """workflow with 3 tasks, the second task has two inputs and inner splitter from one of the input, (inner input come from the first task that has its own splitter, @@ -1732,23 +1538,21 @@ def test_wf_ndstinner_5(plugin, tmpdir): """ @workflow.define(outputs=["out_list", "out_mult", "out_add"]) - def Workflow(x, y, b): + def Worky(x, y, b): list = workflow.add(ListOutput().split("x", x=x)) - mult = workflow.add(Multiply().split(["y", "x"], x=list.out, y=y)) + mult = workflow.add(Multiply().split(["y", "x"], x=list.out, y=y), name="mult") addvar = workflow.add(FunAddVar(a=mult.out).split("b", b=b)) return list.out, mult.out, addvar.out - wf = Workflow(x=[1, 2], y=[10, 100], b=[3, 5]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2], y=[10, 100], b=[3, 5]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf.mult.state.splitter == ["_list", ["mult.y", "mult.x"]] - assert wf.mult.state.splitter_rpn == ["list.x", "mult.y", "mult.x", "*", "*"] - assert wf.addvar.state.splitter == ["_mult", "addvar.b"] - assert wf.addvar.state.splitter_rpn == [ + wf = Workflow.construct(worky) + assert wf["mult"].state.splitter == ["_list", ["mult.y", "mult.x"]] + assert wf["mult"].state.splitter_rpn == ["list.x", "mult.y", "mult.x", "*", "*"] + assert wf["addvar"].state.splitter == ["_mult", "addvar.b"] + assert wf["addvar"].state.splitter_rpn == [ "list.x", "mult.y", "mult.x", @@ -1758,8 +1562,8 @@ def Workflow(x, y, b): "*", ] - assert results.outputs.out_list == [[1, 2, 3], [2, 4, 6]] - assert results.outputs.out_mult == [ + assert outputs.out_list == [[1, 2, 3], [2, 4, 6]] + assert outputs.out_mult == [ 10, 20, 30, @@ -1773,7 +1577,7 @@ def Workflow(x, y, b): 400, 600, ] - assert results.outputs.out_add == [ + assert outputs.out_add == [ 13, 15, 23, @@ -1804,127 +1608,111 @@ def Workflow(x, y, b): # workflow that have some single values as the input -def test_wf_st_singl_1(plugin, tmpdir): +def test_wf_st_singl_1(plugin, tmp_path): """workflow with two tasks, only one input is in the splitter and combiner""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow().split("x", x=[1, 2], y=11).combine("x") - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky().split("x", x=[1, 2], y=11).combine("x") - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out[0] == 13 - assert results.outputs.out[1] == 24 + assert outputs.out[0] == 13 + assert outputs.out[1] == 24 -def test_wf_ndst_singl_1(plugin, tmpdir): +def test_wf_ndst_singl_1(plugin, tmp_path): """workflow with two tasks, outer splitter and combiner on tasks level; only one input is part of the splitter, the other is a single value """ @workflow.define - def Workflow(x, y): + def Worky(x, y): mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") - add2 = workflow.add(Add2(x=mult.out).combine("mult.x")) + add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") return add2.out - wf = Workflow(x=[1, 2], y=11) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[1, 2], y=11) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [13, 24] + assert outputs.out == [13, 24] -def test_wf_st_singl_2(plugin, tmpdir): +def test_wf_st_singl_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the workflow level only one input is part of the splitter, the other is a single value """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2(x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow().split("x", x=[1, 2, 3], y=11) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky().split("x", x=[1, 2, 3], y=11) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert len(results) == 3 - assert results.outputs.out[0] == 39 - assert results.outputs.out[1] == 52 - assert results.outputs.out[2] == 65 + assert outputs.out[0] == 39 + assert outputs.out[1] == 52 + assert outputs.out[2] == 65 -def test_wf_ndst_singl_2(plugin, tmpdir): +def test_wf_ndst_singl_2(plugin, tmp_path): """workflow with three tasks, third one connected to two previous tasks, splitter on the tasks levels only one input is part of the splitter, the other is a single value """ @workflow.define - def Workflow(x, y): + def Worky(x, y): add2x = workflow.add(Add2().split("x", x=x), name="add2x") add2y = workflow.add(Add2(x=y), name="add2y") - mult = workflow.add(Multiply(x=add2x.out, y=add2y.out)) + mult = workflow.add(Multiply(x=add2x.out, y=add2y.out), name="mult") return mult.out - wf = Workflow(x=[1, 2, 3], y=11) + worky = Worky(x=[1, 2, 3], y=11) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert len(outputs.out) == 3 + assert outputs.out == [39, 52, 65] - assert len(results.outputs.out) == 3 - assert results.outputs.out == [39, 52, 65] +# workflows with structures worky(A) -# workflows with structures wf(A) - -def test_wfasnd_1(plugin, tmpdir): +def test_wfasnd_1(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter """ @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x)) return wfnd.out - wf = Workflow(x=2) + worky = Worky(x=2) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out == 4 - assert results.outputs.out == 4 - -def test_wfasnd_wfinp_1(plugin, tmpdir): +def test_wfasnd_wfinp_1(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter input set for the main workflow @@ -1932,28 +1720,26 @@ def test_wfasnd_wfinp_1(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x)) return wfnd.out - wf = Workflow(x=2) - - checksum_before = wf._checksum - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2) - assert not results.errored, "\n".join(results.errors["error message"]) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf._checksum == checksum_before + wf = Workflow.construct(worky) + assert worky._hash == checksum_before - assert results.outputs.out == 4 + assert outputs.out == 4 -def test_wfasnd_wfndupdate(plugin, tmpdir): +def test_wfasnd_wfndupdate(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter wfasnode input is updated to use the main workflow input @@ -1961,25 +1747,22 @@ def test_wfasnd_wfndupdate(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x)) return wfnd.out - wf = Workflow(x=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == 5 + assert outputs.out == 5 -def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): +def test_wfasnd_wfndupdate_rerun(plugin, tmp_path): """workflow as a node workflow-node with one task and no splitter wfasnode is run first and later is @@ -1988,45 +1771,39 @@ def test_wfasnd_wfndupdate_rerun(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out wfnd = Wfnd(x=2) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: sub(wfnd) @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x)) return wfnd.out - wf = Workflow(x=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == 5 + assert outputs.out == 5 # adding another layer of workflow @workflow.define - def WorkflowO(x): - wf = workflow.add(Workflow(x=3)) - return wf.out - - wf_o = WorkflowO(x=4) + def WorkyO(x): + worky = workflow.add(Worky(x=3)) + return wf["out"] - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf_o) + wf_o = WorkyO(x=4) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = wf_o(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == 6 + assert outputs.out == 6 -def test_wfasnd_st_1(plugin, tmpdir): +def test_wfasnd_st_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for wfnd @@ -2034,28 +1811,26 @@ def test_wfasnd_st_1(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x).split(x=x)) return wfnd.out - wf = Workflow(x=[2, 4]) - - checksum_before = wf._checksum - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[2, 4]) - assert not results.errored, "\n".join(results.errors["error message"]) + checksum_before = worky._hash + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert wf._checksum == checksum_before + wf = Workflow.construct(worky) + assert worky._hash == checksum_before - assert results.outputs.out == [4, 6] + assert outputs.out == [4, 6] -def test_wfasnd_st_updatespl_1(plugin, tmpdir): +def test_wfasnd_st_updatespl_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for wfnd is set after add @@ -2063,25 +1838,22 @@ def test_wfasnd_st_updatespl_1(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x).split(x=x)) return wfnd.out - wf = Workflow(x=[2, 4]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[2, 4]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [4, 6] + assert outputs.out == [4, 6] -def test_wfasnd_ndst_1(plugin, tmpdir): +def test_wfasnd_ndst_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for node @@ -2089,25 +1861,22 @@ def test_wfasnd_ndst_1(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2().split("x", x=x)) + add2 = workflow.add(Add2().split("x", x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x)) return wfnd.out - wf = Workflow(x=[2, 4]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=[2, 4]) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == [4, 6] + assert outputs.out == [4, 6] -def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): +def test_wfasnd_ndst_updatespl_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for node added after add @@ -2115,25 +1884,22 @@ def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2().split("x", x=x)) + add2 = workflow.add(Add2().split("x", x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x)) return wfnd.out - wf = Workflow(x=[2, 4]) + worky = Worky(x=[2, 4]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out == [4, 6] - assert results.outputs.out == [4, 6] - -def test_wfasnd_wfst_1(plugin, tmpdir): +def test_wfasnd_wfst_1(plugin, tmp_path): """workflow as a node workflow-node with one task, splitter for the main workflow @@ -2141,30 +1907,28 @@ def test_wfasnd_wfst_1(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x)) return wfnd.out - wf = Workflow().split("x", x=[2, 4]) + worky = Worky().split("x", x=[2, 4]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert results.outputs.out[0] == 4 - assert results.outputs.out[1] == 6 + assert outputs.out[0] == 4 + assert outputs.out[1] == 6 -# workflows with structures wf(A) -> B +# workflows with structures worky(A) -> B -def test_wfasnd_st_2(plugin, tmpdir): +def test_wfasnd_st_2(plugin, tmp_path): """workflow as a node, the main workflow has two tasks, splitter for wfnd @@ -2172,27 +1936,25 @@ def test_wfasnd_st_2(plugin, tmpdir): @workflow.define def Wfnd(x, y): - mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y)) + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y), name="mult") return mult.out @workflow.define - def Workflow(x, y): + def Worky(x, y): wfnd = workflow.add(Wfnd(x=x, y=y)) - add2 = workflow.add(Add2(x=wfnd.out)) + add2 = workflow.add(Add2(x=wfnd.out), name="add2") return add2.out - wf = Workflow(x=[2, 4], y=[1, 10]) + worky = Worky(x=[2, 4], y=[1, 10]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert results.outputs.out == [4, 42] + assert outputs.out == [4, 42] -def test_wfasnd_wfst_2(plugin, tmpdir): +def test_wfasnd_wfst_2(plugin, tmp_path): """workflow as a node, the main workflow has two tasks, splitter for the main workflow @@ -2200,31 +1962,29 @@ def test_wfasnd_wfst_2(plugin, tmpdir): @workflow.define def Wfnd(x, y): - mult = workflow.add(Multiply(x=x, y=y)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") return mult.out @workflow.define - def Workflow(x, y): + def Worky(x, y): wfnd = workflow.add(Wfnd(x=x, y=y)) - add2 = workflow.add(Add2(x=wfnd.out)) + add2 = workflow.add(Add2(x=wfnd.out), name="add2") return add2.out - wf = Workflow().split(("x", "y"), x=[2, 4], y=[1, 10]) + worky = Worky().split(("x", "y"), x=[2, 4], y=[1, 10]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert results.outputs.out[0] == 4 - assert results.outputs.out[1] == 42 + assert outputs.out[0] == 4 + assert outputs.out[1] == 42 -# workflows with structures A -> wf(B) +# workflows with structures A -> worky(B) -def test_wfasnd_ndst_3(plugin, tmpdir): +def test_wfasnd_ndst_3(plugin, tmp_path): """workflow as the second node, the main workflow has two tasks, splitter for the first task @@ -2232,27 +1992,25 @@ def test_wfasnd_ndst_3(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y)) + def Worky(x, y): + mult = workflow.add(Multiply().split(("x", "y"), x=x, y=y), name="mult") wfnd = workflow.add(Wfnd(mult.out)) return wfnd.out - wf = Workflow(x=[2, 4], y=[1, 10]) + worky = Worky(x=[2, 4], y=[1, 10]) - with Submitter(cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(cache_dir=tmp_path, plugin=plugin) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert results.outputs.out == [4, 42] + assert outputs.out == [4, 42] -def test_wfasnd_wfst_3(plugin, tmpdir): +def test_wfasnd_wfst_3(plugin, tmp_path): """workflow as the second node, the main workflow has two tasks, splitter for the main workflow @@ -2260,33 +2018,31 @@ def test_wfasnd_wfst_3(plugin, tmpdir): @workflow.define def Wfnd(x): - add2 = workflow.add(Add2(x=x)) + add2 = workflow.add(Add2(x=x), name="add2") return add2.out @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") wfnd = workflow.add(Wfnd(mult.out)) return wfnd.out - wf = Workflow().split(("x", "y"), x=[2, 4], y=[1, 10]) + worky = Worky().split(("x", "y"), x=[2, 4], y=[1, 10]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert results.outputs.out[0] == 4 - assert results.outputs.out[1] == 42 + assert outputs.out[0] == 4 + assert outputs.out[1] == 42 # workflows with structures wfns(A->B) -def test_wfasnd_4(plugin, tmpdir): +def test_wfasnd_4(plugin, tmp_path): """workflow as a node workflow-node with two tasks and no splitter """ @@ -2298,21 +2054,18 @@ def Wfnd(x): return add2_2nd.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=2)) return wfnd.out - wf = Workflow(x=2) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert results.outputs.out == 6 + assert outputs.out == 6 -def test_wfasnd_ndst_4(plugin, tmpdir): +def test_wfasnd_ndst_4(plugin, tmp_path): """workflow as a node workflow-node with two tasks, splitter for node @@ -2325,21 +2078,18 @@ def Wfnd(x): return add2_2nd.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x)) return wfnd.out - wf = Workflow(x=[2, 4]) + worky = Worky(x=[2, 4]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) + assert outputs.out == [6, 8] - assert results.outputs.out == [6, 8] - -def test_wfasnd_wfst_4(plugin, tmpdir): +def test_wfasnd_wfst_4(plugin, tmp_path): """workflow as a node workflow-node with two tasks, splitter for the main workflow @@ -2352,93 +2102,85 @@ def Wfnd(x): return add2_2nd.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x)) return wfnd.out - wf = Workflow().split("x", x=[2, 4]) + worky = Worky().split("x", x=[2, 4]) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - # assert wf.output_dir.exists() + # assert wf["output_dir"].exists() - assert results.outputs.out[0] == 6 - assert results.outputs.out[1] == 8 + assert outputs.out[0] == 6 + assert outputs.out[1] == 8 # Testing caching @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachedir(plugin, tmpdir): - """wf with provided cache_dir using pytest tmpdir""" - cache_dir = tmpdir.mkdir("test_wf_cache_1") +def test_wf_nostate_cachedir(plugin, tmp_path): + """worky with provided cache_dir using pytest tmp_path""" + cache_dir = tmp_path.mkdir("test_wf_cache_1") @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2, y=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 8 == results.outputs.out + assert 8 == outputs.out shutil.rmtree(cache_dir) @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachedir_relativepath(tmpdir, plugin): - """wf with provided cache_dir as relative path""" - tmpdir.chdir() +def test_wf_nostate_cachedir_relativepath(tmp_path, plugin): + """worky with provided cache_dir as relative path""" + tmp_path.chdir() cache_dir = "test_wf_cache_2" - tmpdir.mkdir(cache_dir) + tmp_path.mkdir(cache_dir) @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=2, y=3) - assert not results.errored, "\n".join(results.errors["error message"]) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert 8 == results.outputs.out + assert 8 == outputs.out shutil.rmtree(cache_dir) @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations(plugin, tmpdir): +def test_wf_nostate_cachelocations(plugin, tmp_path): """ Two identical wfs with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2446,19 +2188,19 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -2471,32 +2213,30 @@ def Workflow2(x, y): assert t1 > 2 assert t2 < max(1, t1 - 1) - # checking if the second wf didn't run again - assert wf1.output_dir.exists() - assert not wf2.output_dir.exists() + # checking if the second worky didn't run again @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_a(plugin, tmpdir): +def test_wf_nostate_cachelocations_a(plugin, tmp_path): """ the same as previous test, but workflows names differ; the task should not be run and it should be fast, - but the wf itself is triggered and the new output dir is created + but the worky itself is triggered and the new output dir is created """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2504,19 +2244,19 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -2527,35 +2267,35 @@ def Workflow2(x, y): if not sys.platform.startswith("win") and plugin == "cf": # checking execution time (second one should be quick) assert t1 > 2 - # testing relative values (windows or slurm takes much longer to create wf itself) + # testing relative values (windows or slurm takes much longer to create worky itself) assert t2 < max(1, t1 - 1) - # checking if both wf.output_dir are created + # checking if both wf["output_dir"] are created assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_b(plugin, tmpdir): +def test_wf_nostate_cachelocations_b(plugin, tmp_path): """ the same as previous test, but the 2nd workflows has two outputs (connected to the same task output); the task should not be run and it should be fast, - but the wf itself is triggered and the new output dir is created + but the worky itself is triggered and the new output dir is created """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2563,19 +2303,19 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define("out_pr") - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -2588,32 +2328,32 @@ def Workflow2(x, y): assert t1 > 2 assert t2 < max(1, t1 - 1) - # checking if the second wf didn't run again + # checking if the second worky didn't run again assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_setoutputchange(plugin, tmpdir): +def test_wf_nostate_cachelocations_setoutputchange(plugin, tmp_path): """ - the same as previous test, but wf output names differ, + the same as previous test, but worky output names differ, the tasks should not be run and it should be fast, - but the wf itself is triggered and the new output dir is created - (the second wf has updated name in its Output) + but the worky itself is triggered and the new output dir is created + (the second worky has updated name in its Output) """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define(outputs=["out1"]) - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out # out1 - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2621,18 +2361,18 @@ def Workflow1(x, y): assert 8 == results1.outputs.out1 @workflow.define(outputs=["out2"]) - def Workflow2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out # out2 - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -2641,34 +2381,34 @@ def Workflow2(x, y): # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": - # checking execution time (the second wf should be fast, nodes do not have to rerun) + # checking execution time (the second worky should be fast, nodes do not have to rerun) assert t1 > 2 - # testing relative values (windows or slurm takes much longer to create wf itself) + # testing relative values (windows or slurm takes much longer to create worky itself) assert t2 < max(1, t1 - 1) - # both wf output_dirs should be created + # both worky output_dirs should be created assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmpdir): +def test_wf_nostate_cachelocations_setoutputchange_a(plugin, tmp_path): """ - the same as previous test, but wf names and output names differ, + the same as previous test, but worky names and output names differ, """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define(outputs=["out1"]) - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out # out1 - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2676,18 +2416,18 @@ def Workflow1(x, y): assert 8 == results1.outputs.out1 @workflow.define(outputs=["out2"]) - def Workflow2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -2697,34 +2437,34 @@ def Workflow2(x, y): # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": assert t1 > 2 - # testing relative values (windows or slurm takes much longer to create wf itself) + # testing relative values (windows or slurm takes much longer to create worky itself) assert t2 < max(1, t1 - 1) - # both wf output_dirs should be created + # both worky output_dirs should be created assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_forcererun(plugin, tmpdir): +def test_wf_nostate_cachelocations_forcererun(plugin, tmp_path): """ Two identical wfs with provided cache_dir; - the second wf has cache_locations, + the second worky has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2732,17 +2472,17 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: - results2 = sub(wf2, rerun=True) + results2 = sub(worky2, rerun=True) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -2755,31 +2495,31 @@ def Workflow2(x, y): assert t1 > 2 assert t2 > 2 - # checking if the second wf didn't run again + # checking if the second worky didn't run again assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmpdir): +def test_wf_nostate_cachelocations_wftaskrerun_propagateTrue(plugin, tmp_path): """ Two identical wfs with provided cache_dir and cache_locations for the second one; - submitter doesn't have rerun, but the second wf has rerun=True, + submitter doesn't have rerun, but the second worky has rerun=True, propagate_rerun is True as default, so everything should be rerun """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2787,25 +2527,25 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2, rerun=True) + results2 = sub(worky2, rerun=True) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out - # checking if the second wf runs again + # checking if the second worky runs again assert results1.output_dir != results2.output_dir # everything has to be recomputed @@ -2819,27 +2559,27 @@ def Workflow2(x, y): @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmpdir): +def test_wf_nostate_cachelocations_wftaskrerun_propagateFalse(plugin, tmp_path): """ Two identical wfs with provided cache_dir and cache_locations for the second one; - submitter doesn't have rerun, but the second wf has rerun=True, - propagate_rerun is set to False, so wf will be triggered, + submitter doesn't have rerun, but the second worky has rerun=True, + propagate_rerun is set to False, so worky will be triggered, but tasks will not have rerun, so will use the previous results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2847,25 +2587,25 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2, rerun=True, propagate_rerun=False) + results2 = sub(worky2, rerun=True, propagate_rerun=False) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 assert 8 == results2.outputs.out - # checking if the second wf runs again + # checking if the second worky runs again assert results1.output_dir != results2.output_dir # for win and dask/slurm the time for dir creation etc. might take much longer @@ -2880,26 +2620,26 @@ def Workflow2(x, y): @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmpdir): +def test_wf_nostate_cachelocations_taskrerun_wfrerun_propagateFalse(plugin, tmp_path): """ - Two identical wfs with provided cache_dir, and cache_locations for the second wf; - submitter doesn't have rerun, but wf has rerun=True, + Two identical wfs with provided cache_dir, and cache_locations for the second worky; + submitter doesn't have rerun, but worky has rerun=True, since propagate_rerun=False, only tasks that have rerun=True will be rerun """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -2907,20 +2647,20 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - # rerun on the task level needed (wf.propagate_rerun is False) - add2 = workflow.add(Add2Wait(x=mult.out, rerun=True)) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + # rerun on the task level needed (wf["propagate_rerun"] is False) + add2 = workflow.add(Add2Wait(x=mult.out, rerun=True), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: results2 = sub( - wf2, rerun=True, propagate_rerun=False + worky2, rerun=True, propagate_rerun=False ) # rerun will not be propagated to each task) t2 = time.time() - t0 @@ -2939,125 +2679,125 @@ def Workflow2(x, y): @pytest.mark.flaky(reruns=3) -def test_wf_nostate_nodecachelocations(plugin, tmpdir): +def test_wf_nostate_nodecachelocations(plugin, tmp_path): """ Two wfs with different input, but the second node has the same input; - the second wf has cache_locations and should recompute the wf, + the second worky has cache_locations and should recompute the worky, but without recomputing the second node """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x): + def Worky1(x): ten = workflow.add(Ten(x=x)) - add2 = workflow.add(Add2(x=ten.out)) + add2 = workflow.add(Add2(x=ten.out), name="add2") return add2.out - wf1 = Workflow1(x=3) + worky1 = Worky1(x=3) with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) assert 12 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): ten = workflow.add(Ten(x=x)) - add2 = workflow.add(Add2(x=ten.out)) + add2 = workflow.add(Add2(x=ten.out), name="add2") return add2.out - wf2 = Workflow2(x=2) + worky2 = Worky2(x=2) with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) assert 12 == results2.outputs.out - # checking if the second wf runs again, but runs only one task + # checking if the second worky runs again, but runs only one task assert results1.output_dir != results2.output_dir - # the second wf should rerun one task + # the second worky should rerun one task assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @pytest.mark.flaky(reruns=3) -def test_wf_nostate_nodecachelocations_upd(plugin, tmpdir): +def test_wf_nostate_nodecachelocations_upd(plugin, tmp_path): """ Two wfs with different input, but the second node has the same input; - the second wf has cache_locations (set after adding tasks) and should recompute, + the second worky has cache_locations (set after adding tasks) and should recompute, but without recomputing the second node """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x): + def Worky1(x): ten = workflow.add(Ten(x=x)) - add2 = workflow.add(Add2(x=ten.out)) + add2 = workflow.add(Add2(x=ten.out), name="add2") return add2.out - wf1 = Workflow1(x=3) + worky1 = Worky1(x=3) with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) assert 12 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): ten = workflow.add(Ten(x=x)) - add2 = workflow.add(Add2(x=ten.out)) + add2 = workflow.add(Add2(x=ten.out), name="add2") return add2.out - wf2 = Workflow2(x=2) + worky2 = Worky2(x=2) # updating cache_locations after adding the tasks - wf2.cache_locations = cache_dir1 + worky2.cache_locations = cache_dir1 with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) assert 12 == results2.outputs.out - # checking if the second wf runs again, but runs only one task + # checking if the second worky runs again, but runs only one task assert results1.output_dir != results2.output_dir - # the second wf should have only one task run + # the second worky should have only one task run assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @pytest.mark.flaky(reruns=3) -def test_wf_state_cachelocations(plugin, tmpdir): +def test_wf_state_cachelocations(plugin, tmp_path): """ Two identical wfs (with states) with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3066,19 +2806,19 @@ def Workflow1(x, y): assert results1.outputs.out[1] == 82 @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3093,37 +2833,35 @@ def Workflow2(x, y): assert t2 < max(1, t1 - 1) # checking all directories - assert wf1.output_dir - for odir in wf1.output_dir: - assert odir.exists() - # checking if the second wf didn't run again + + # checking if the second worky didn't run again # checking all directories - assert wf2.output_dir - for odir in wf2.output_dir: + + for odir in worky2.output_dir: assert not odir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_state_cachelocations_forcererun(plugin, tmpdir): +def test_wf_state_cachelocations_forcererun(plugin, tmp_path): """ Two identical wfs (with states) with provided cache_dir; - the second wf has cache_locations, + the second worky has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3132,17 +2870,17 @@ def Workflow1(x, y): assert results1.outputs.out[1] == 82 @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: - results2 = sub(wf2, rerun=True) + results2 = sub(worky2, rerun=True) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3157,38 +2895,33 @@ def Workflow2(x, y): assert t2 > 2 # checking all directories - assert wf1.output_dir - for odir in wf1.output_dir: - assert odir.exists() - # checking if the second wf run again + + # checking if the second worky run again # checking all directories - assert wf2.output_dir - for odir in wf2.output_dir: - assert odir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_state_cachelocations_updateinp(plugin, tmpdir): +def test_wf_state_cachelocations_updateinp(plugin, tmp_path): """ Two identical wfs (with states) with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results (the lazy input of the node is updated to the correct one, - i.e. the same as in wf1, after adding the node to the wf) + i.e. the same as in worky1, after adding the node to the worky) """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3197,18 +2930,18 @@ def Workflow1(x, y): assert results1.outputs.out[1] == 82 @workflow.define - def Workflow2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky2(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3223,88 +2956,77 @@ def Workflow2(x, y): assert t2 < max(1, t1 - 1) # checking all directories - assert wf1.output_dir - for odir in wf1.output_dir: - assert odir.exists() - # checking if the second wf didn't run again + + # checking if the second worky didn't run again # checking all directories - assert wf2.output_dir - for odir in wf2.output_dir: - assert not odir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): +def test_wf_state_n_nostate_cachelocations(plugin, tmp_path): """ Two wfs with provided cache_dir, the first one has no state, the second has; - the second wf has cache_locations and should not recompute only one element + the second worky has cache_locations and should not recompute only one element """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) assert results1.outputs.out == 8 @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) + worky2 = Worky2().split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) assert results2.outputs.out[0] == 8 assert results2.outputs.out[1] == 82 - # checking the directory from the first wf - assert wf1.output_dir.exists() - # checking directories from the second wf, only second element should be recomputed - assert not wf2.output_dir[0].exists() - assert wf2.output_dir[1].exists() - -def test_wf_nostate_cachelocations_updated(plugin, tmpdir): +def test_wf_nostate_cachelocations_updated(plugin, tmp_path): """ Two identical wfs with provided cache_dir; - the second wf has cache_locations in init, + the second worky has cache_locations in init, that is later overwritten in Submitter.__call__; the cache_locations from call doesn't exist so the second task should run again """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir1_empty = tmpdir.mkdir("test_wf_cache3_empty") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir1_empty = tmp_path.mkdir("test_wf_cache3_empty") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3312,20 +3034,20 @@ def Workflow1(x, y): assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) t0 = time.time() # changing cache_locations to non-existing dir with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1_empty ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3338,49 +3060,49 @@ def Workflow2(x, y): assert t1 > 2 assert t2 > 2 - # checking if both wf run + # checking if both worky run assert results1.output_dir != results2.output_dir @pytest.mark.flaky(reruns=3) -def test_wf_nostate_cachelocations_recompute(plugin, tmpdir): +def test_wf_nostate_cachelocations_recompute(plugin, tmp_path): """ Two wfs with the same inputs but slightly different graph; - the second wf should recompute the results, - but the second node should use the results from the first wf (has the same input) + the second worky should recompute the results, + but the second node should use the results from the first worky (has the same input) """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) assert 8 == results1.outputs.out @workflow.define - def Workflow2(x, y): + def Worky2(x, y): # different argument assignment - mult = workflow.add(Multiply(x=y, y=x)) - add2 = workflow.add(Add2(x=mult.out)) + mult = workflow.add(Multiply(x=y, y=x), name="mult") + add2 = workflow.add(Add2(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=2, y=3) + worky2 = Worky2(x=2, y=3) with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) @@ -3389,31 +3111,33 @@ def Workflow2(x, y): # checking if both dir exists assert results1.output_dir != results2.output_dir - # the second wf should have only one task run + # the second worky should have only one task run assert len(list(Path(cache_dir1).glob("F*"))) == 2 assert len(list(Path(cache_dir2).glob("F*"))) == 1 @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations(plugin, tmpdir): +def test_wf_ndstate_cachelocations(plugin, tmp_path): """ Two wfs with identical inputs and node states; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=[2, 20], y=[3, 4]) + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3421,19 +3145,21 @@ def Workflow1(x, y): assert results1.outputs.out == [8, 82] @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=[2, 20], y=[3, 4]) + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3446,35 +3172,30 @@ def Workflow2(x, y): assert t1 > 2 assert t2 < max(1, t1 - 1) - # checking all directories - assert wf1.output_dir.exists() - - # checking if the second wf didn't run again - # checking all directories - assert not wf2.output_dir.exists() - @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): +def test_wf_ndstate_cachelocations_forcererun(plugin, tmp_path): """ Two wfs with identical inputs and node states; - the second wf has cache_locations, + the second worky has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=[2, 20], y=[3, 4]) + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3482,17 +3203,19 @@ def Workflow1(x, y): assert results1.outputs.out == [8, 82] @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=[2, 20], y=[3, 4]) + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir2) as sub: - results2 = sub(wf2, rerun=True) + results2 = sub(worky2, rerun=True) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3506,32 +3229,32 @@ def Workflow2(x, y): assert t2 > 2 # checking all directories - assert wf1.output_dir.exists() - # checking if the second wf run again - assert wf2.output_dir.exists() + # checking if the second worky run again @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): +def test_wf_ndstate_cachelocations_updatespl(plugin, tmp_path): """ Two wfs with identical inputs and node state (that is set after adding the node!); - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=[2, 20], y=[3, 4]) + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3539,19 +3262,21 @@ def Workflow1(x, y): assert results1.outputs.out == [8, 82] @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=[2, 20], y=[3, 4]) + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3565,33 +3290,33 @@ def Workflow2(x, y): assert t2 < max(1, t1 - 1) # checking all directories - assert wf1.output_dir.exists() - # checking if the second wf didn't run again + # checking if the second worky didn't run again # checking all directories - assert not wf2.output_dir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): +def test_wf_ndstate_cachelocations_recompute(plugin, tmp_path): """ Two wfs (with nodes with states) with provided cache_dir; - the second wf has cache_locations and should not recompute the results + the second worky has cache_locations and should not recompute the results """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") - cache_dir2 = tmpdir.mkdir("test_wf_cache4") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") + cache_dir2 = tmp_path.mkdir("test_wf_cache4") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply().split(splitter=("x", "y"), x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add( + Multiply().split(splitter=("x", "y"), x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=[2, 20], y=[3, 4]) + worky1 = Worky1(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3599,19 +3324,21 @@ def Workflow1(x, y): assert results1.outputs.out == [8, 82] @workflow.define - def Workflow2(x, y): + def Worky2(x, y): - mult = workflow.add(Multiply().split(splitter=["x", "y"], x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + mult = workflow.add( + Multiply().split(splitter=["x", "y"], x=x, y=y), name="mult" + ) + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf2 = Workflow2(x=[2, 20], y=[3, 4]) + worky2 = Worky2(x=[2, 20], y=[3, 4]) t0 = time.time() with Submitter( worker=plugin, cache_dir=cache_dir2, cache_locations=cache_dir1 ) as sub: - results2 = sub(wf2) + results2 = sub(worky2) assert not results2.errored, "\n".join(results2.errors["error message"]) t2 = time.time() - t0 @@ -3625,54 +3352,51 @@ def Workflow2(x, y): assert t2 > 2 # checking all directories - assert wf1.output_dir.exists() - # checking if the second wf didn't run again + # checking if the second worky didn't run again # checking all directories - assert wf2.output_dir.exists() @pytest.mark.flaky(reruns=3) -def test_wf_nostate_runtwice_usecache(plugin, tmpdir): +def test_wf_nostate_runtwice_usecache(plugin, tmp_path): """ running workflow (without state) twice, the second run should use the results from the first one """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1(x=2, y=3) + worky1 = Worky1(x=2, y=3) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 assert 8 == results1.outputs.out # checkoing output_dir after the first run - assert wf1.output_dir.exists() # saving the content of the cache dit after the first run - cache_dir_content = os.listdir(wf1.cache_dir) + cache_dir_content = os.listdir(worky1.cache_dir) # running workflow the second time t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t2 = time.time() - t0 assert 8 == results1.outputs.out # checking if no new directory is created - assert cache_dir_content == os.listdir(wf1.cache_dir) + assert cache_dir_content == os.listdir(worky1.cache_dir) # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": @@ -3681,24 +3405,24 @@ def Workflow1(x, y): assert t2 < max(1, t1 - 1) -def test_wf_state_runtwice_usecache(plugin, tmpdir): +def test_wf_state_runtwice_usecache(plugin, tmp_path): """ running workflow with a state twice, the second run should use the results from the first one """ - cache_dir1 = tmpdir.mkdir("test_wf_cache3") + cache_dir1 = tmp_path.mkdir("test_wf_cache3") @workflow.define - def Workflow1(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add2 = workflow.add(Add2Wait(x=mult.out)) + def Worky1(x, y): + mult = workflow.add(Multiply(x=x, y=y), name="mult") + add2 = workflow.add(Add2Wait(x=mult.out), name="add2") return add2.out - wf1 = Workflow1().split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) + worky1 = Worky1().split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t1 = time.time() - t0 @@ -3707,15 +3431,15 @@ def Workflow1(x, y): assert 602 == results1.outputs.out[1] # checkoing output_dir after the first run - assert [odir.exists() for odir in wf1.output_dir] + assert [odir.exists() for odir in worky1.output_dir] # saving the content of the cache dit after the first run - cache_dir_content = os.listdir(wf1.cache_dir) + cache_dir_content = os.listdir(worky1.cache_dir) # running workflow the second time t0 = time.time() with Submitter(worker=plugin, cache_dir=cache_dir1) as sub: - results1 = sub(wf1) + results1 = sub(worky1) assert not results1.errored, "\n".join(results1.errors["error message"]) t2 = time.time() - t0 @@ -3723,7 +3447,7 @@ def Workflow1(x, y): assert 8 == results1.outputs.out[0] assert 602 == results1.outputs.out[1] # checking if no new directory is created - assert cache_dir_content == os.listdir(wf1.cache_dir) + assert cache_dir_content == os.listdir(worky1.cache_dir) # for win and dask/slurm the time for dir creation etc. might take much longer if not sys.platform.startswith("win") and plugin == "cf": # checking the execution time @@ -3734,49 +3458,49 @@ def Workflow1(x, y): @pytest.fixture def create_tasks(): @workflow.define - def Workflow(x): + def Worky(x): t1 = workflow.add(Add2(x=x), name="t1") t2 = workflow.add(Multiply(x=t1.out, y=2), name="t2") return t2.out - wf = Workflow(x=1) - workflow_obj = pydra.engine.core.Workflow.construct(wf) + worky = Worky(x=1) + workflow_obj = pydra.engine.core.Workflow.construct(worky) t1 = workflow_obj["t1"] t2 = workflow_obj["t2"] - return wf, t1, t2 + return worky, t1, t2 -def test_cache_propagation1(tmpdir, create_tasks): +def test_cache_propagation1(tmp_path, create_tasks): """No cache set, all independent""" - wf, t1, t2 = create_tasks - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir == t2.cache_dir - wf.cache_dir = (tmpdir / "shared").strpath - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir == t2.cache_dir + worky, t1, t2 = create_tasks + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir == t2.cache_dir + worky.cache_dir = (tmp_path / "shared").strpath + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir == t2.cache_dir -def test_cache_propagation2(tmpdir, create_tasks): +def test_cache_propagation2(tmp_path, create_tasks): """Task explicitly states no inheriting""" - wf, t1, t2 = create_tasks - wf.cache_dir = (tmpdir / "shared").strpath + worky, t1, t2 = create_tasks + worky.cache_dir = (tmp_path / "shared").strpath t2.allow_cache_override = False - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir != t2.cache_dir + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir != t2.cache_dir -def test_cache_propagation3(tmpdir, create_tasks): +def test_cache_propagation3(tmp_path, create_tasks): """Shared cache_dir with state""" - wf, t1, t2 = create_tasks - wf = wf.split("x", x=[1, 2]) - wf.cache_dir = (tmpdir / "shared").strpath - wf(plugin="cf") - assert wf.cache_dir == t1.cache_dir == t2.cache_dir + worky, t1, t2 = create_tasks + worky = wf["split"]("x", x=[1, 2]) + worky.cache_dir = (tmp_path / "shared").strpath + worky(plugin="cf") + assert wf["cache_dir"] == t1.cache_dir == t2.cache_dir -def test_workflow_combine1(tmpdir): +def test_workflow_combine1(tmp_path): @workflow.define(outputs=["out_pow", "out_iden1", "out_iden2"]) - def Workflow1(a, b): + def Worky1(a, b): power = workflow.add(Power().split(["a", "b"], a=a, b=b)) identity1 = workflow.add( Identity(x=power.out).combine("power.a"), name="identity1" @@ -3786,362 +3510,195 @@ def Workflow1(a, b): ) return power.out, identity1.out, identity2.out - wf1 = Workflow1(a=[1, 2], b=[2, 3]) - outputs = wf1() + worky1 = Worky1(a=[1, 2], b=[2, 3]) + outputs = worky1() assert outputs.out_pow == [1, 1, 4, 8] assert outputs.out_iden1 == [[1, 4], [1, 8]] assert outputs.out_iden2 == [[1, 4], [1, 8]] -def test_workflow_combine2(tmpdir): +def test_workflow_combine2(tmp_path): @workflow.define(outputs=["out_pow", "out_iden"]) - def Workflow1(a, b): + def Worky1(a, b): power = workflow.add(Power().split(["a", "b"], a=a, b=b).combine("a")) identity = workflow.add(Identity(x=power.out).combine("power.b")) return power.out, identity.out - wf1 = Workflow1(a=[1, 2], b=[2, 3]) - outputs = wf1(cache_dir=tmpdir) + worky1 = Worky1(a=[1, 2], b=[2, 3]) + outputs = worky1(cache_dir=tmp_path) assert outputs.out_pow == [[1, 4], [1, 8]] assert outputs.out_iden == [[1, 4], [1, 8]] -# g.all to collect all of the results and let PythonTask deal with it - - -def test_wf_lzoutall_1(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_sub2_res function - using.all syntax - """ - - @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) - return add_sub.out_add - - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert 8 == results.outputs.out - - -def test_wf_lzoutall_1a(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - using.all syntax in the node connections and for wf output - """ - - @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply(x=x, y=y)) - add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) - return add_sub.all_ # out_all - - wf = Workflow(x=2, y=3) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.outputs.out_all == {"out_add": 8, "out_sub": 4} - - -def test_wf_lzoutall_st_1(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - using.all syntax - """ - - @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) - add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) - return add_sub.out_add # out_add - - wf = Workflow(x=[2, 20], y=[3, 30]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.outputs.out_add == [8, 62, 62, 602] - - -def test_wf_lzoutall_st_1a(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - using.all syntax - """ - - @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y)) - add_sub = workflow.add(Add2Sub2Res(res=mult.all_)) - return add_sub.all_ # out_all - - wf = Workflow(x=[2, 20], y=[3, 30]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.outputs.out_all == [ - {"out_add": 8, "out_sub": 4}, - {"out_add": 62, "out_sub": 58}, - {"out_add": 62, "out_sub": 58}, - {"out_add": 602, "out_sub": 598}, - ] - - -def test_wf_lzoutall_st_2(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - using.all syntax - """ - - @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) - add_sub = workflow.add(Add2Sub2ResList(res=mult.all_)) - return add_sub.out_add # out_add - - wf = Workflow(x=[2, 20], y=[3, 30]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.outputs.out_add[0] == [8, 62] - assert results.outputs.out_add[1] == [62, 602] - - -@pytest.mark.xfail( - condition=bool(shutil.which("sbatch")), # using SLURM - reason=( - "Not passing on SLURM image for some reason, hoping upgrade of image/Python " - "version fixes it" - ), -) -def test_wf_lzoutall_st_2a(plugin, tmpdir): - """workflow with 2 tasks, no splitter - passing entire result object to add2_res function - using.all syntax - """ +def test_wf_resultfile_1(plugin, tmp_path): + """workflow with a file in the result, file should be copied to the worky dir""" @workflow.define - def Workflow(x, y): - mult = workflow.add(Multiply().split(["x", "y"], x=x, y=y).combine("x")) - add_sub = workflow.add(Add2Sub2ResList(res=mult.all_)) - return add_sub.all_ # out_all - - wf = Workflow(x=[2, 20], y=[3, 30]) - - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.outputs.out_all == [ - {"out_add": [8, 62], "out_sub": [4, 58]}, - {"out_add": [62, 602], "out_sub": [58, 598]}, - ] - - -# workflows that have files in the result, the files should be copied to the wf dir - - -def test_wf_resultfile_1(plugin, tmpdir): - """workflow with a file in the result, file should be copied to the wf dir""" - - @workflow.define - def Workflow(x): + def Worky(x): writefile = workflow.add(FunWriteFile(filename=x)) return writefile.out # wf_out - wf = Workflow(x="file_1.txt") - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + worky = Worky(x="file_1.txt") + outputs = worky(worker=plugin, cache_dir=tmp_path) - # checking if the file exists and if it is in the Workflow directory - wf_out = results.outputs.wf_out.fspath + # checking if the file exists and if it is in the Worky directory + wf_out = outputs.wf_out.fspath wf_out.exists() - assert wf_out == wf.output_dir / "file_1.txt" + assert wf_out == wf["output_dir"] / "file_1.txt" -def test_wf_resultfile_2(plugin, tmpdir): - """workflow with a list of files in the wf result, - all files should be copied to the wf dir +def test_wf_resultfile_2(plugin, tmp_path): + """workflow with a list of files in the worky result, + all files should be copied to the worky dir """ @workflow.define - def Workflow(x): + def Worky(x): writefile = workflow.add(FunWriteFileList(filename_list=x)) return writefile.out # wf_out file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] - wf = Workflow(x=file_list) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) + worky = Worky(x=file_list) + outputs = worky(worker=plugin, cache_dir=tmp_path) - assert not results.errored, "\n".join(results.errors["error message"]) - - # checking if the file exists and if it is in the Workflow directory - for ii, file in enumerate(results.outputs.wf_out): + # checking if the file exists and if it is in the Worky directory + for ii, file in enumerate(outputs.wf_out): assert file.fspath.exists() - assert file.fspath == wf.output_dir / file_list[ii] + assert file.fspath == wf["output_dir"] / file_list[ii] -def test_wf_resultfile_3(plugin, tmpdir): - """workflow with a dictionaries of files in the wf result, - all files should be copied to the wf dir +def test_wf_resultfile_3(plugin, tmp_path): + """workflow with a dictionaries of files in the worky result, + all files should be copied to the worky dir """ @workflow.define - def Workflow(x): + def Worky(x): writefile = workflow.add(FunWriteFileList2Dict(filename_list=x)) return writefile.out # wf_out file_list = ["file_1.txt", "file_2.txt", "file_3.txt"] - wf = Workflow(x=file_list) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - results = sub(wf) - - assert not results.errored, "\n".join(results.errors["error message"]) + worky = Worky(x=file_list) + outputs = worky(worker=plugin, cache_dir=tmp_path) - # checking if the file exists and if it is in the Workflow directory - for key, val in results.outputs.wf_out.items(): + # checking if the file exists and if it is in the Worky directory + for key, val in outputs.wf_out.items(): if key == "random_int": assert val == 20 else: assert val.fspath.exists() ii = int(key.split("_")[1]) - assert val.fspath == wf.output_dir / file_list[ii] + assert val.fspath == wf["output_dir"] / file_list[ii] -def test_wf_upstream_error1(plugin, tmpdir): +def test_wf_upstream_error1(plugin, tmp_path): """workflow with two tasks, task2 dependent on an task1 which raised an error""" @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out - wf = Workflow(x="hi") # TypeError for adding str and int + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error2(plugin, tmpdir): +def test_wf_upstream_error2(plugin, tmp_path): """task2 dependent on task1, task1 errors, workflow-level split on task 1 goal - workflow finish running, one output errors but the other doesn't """ @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out - wf = Workflow().split( + worky = Worky().split( "x", x=[1, "hi"] ) # workflow-level split TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) @pytest.mark.flaky(reruns=2) # when slurm -def test_wf_upstream_error3(plugin, tmpdir): +def test_wf_upstream_error3(plugin, tmp_path): """task2 dependent on task1, task1 errors, task-level split on task 1 goal - workflow finish running, one output errors but the other doesn't """ @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType().split("a", a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") return addvar2.out - wf = Workflow(x=[1, "hi"]) # TypeError for adding str and int + worky = Worky(x=[1, "hi"]) # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error4(plugin, tmpdir): +def test_wf_upstream_error4(plugin, tmp_path): """workflow with one task, which raises an error""" @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) return addvar1.out - wf = Workflow(x="hi") # TypeError for adding str and int + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "raised an error" in str(excinfo.value) assert "addvar1" in str(excinfo.value) -def test_wf_upstream_error5(plugin, tmpdir): +def test_wf_upstream_error5(plugin, tmp_path): """nested workflow with one task, which raises an error""" @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x)) return addvar1.out # wf_out @workflow.define def WfMain(x): - wf = workflow.add(Workflow(x=x)) - return wf.out + worky = workflow.add(Worky(x=x)) + return wf["out"] wf_main = WfMain(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error6(plugin, tmpdir): +def test_wf_upstream_error6(plugin, tmp_path): """nested workflow with two tasks, the first one raises an error""" @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") @@ -4149,115 +3706,111 @@ def Workflow(x): @workflow.define def WfMain(x): - wf = workflow.add(Workflow(x=x)) - return wf.out + worky = workflow.add(Worky(x=x)) + return wf["out"] wf_main = WfMain(x="hi") # TypeError for adding str and int with pytest.raises(Exception) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: sub(wf_main) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) -def test_wf_upstream_error7(plugin, tmpdir): +def test_wf_upstream_error7(plugin, tmp_path): """ workflow with three sequential tasks, the first task raises an error the last task is set as the workflow output """ @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") return addvar3.out - wf = Workflow(x="hi") # TypeError for adding str and int + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addvar3._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addvar3"]._errored == ["addvar1"] -def test_wf_upstream_error7a(plugin, tmpdir): +def test_wf_upstream_error7a(plugin, tmp_path): """ workflow with three sequential tasks, the first task raises an error the second task is set as the workflow output """ @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") return addvar3.out - wf = Workflow(x="hi") # TypeError for adding str and int + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addvar3._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addvar3"]._errored == ["addvar1"] -def test_wf_upstream_error7b(plugin, tmpdir): +def test_wf_upstream_error7b(plugin, tmp_path): """ workflow with three sequential tasks, the first task raises an error the second and the third tasks are set as the workflow output """ @workflow.define(outputs=["out1", "out2"]) - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") addvar3 = workflow.add(FunAddVarDefaultNoType(a=addvar2.out), name="addvar3") return addvar2.out, addvar3.out # - wf = Workflow(x="hi") # TypeError for adding str and int + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addvar3._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addvar3"]._errored == ["addvar1"] -def test_wf_upstream_error8(plugin, tmpdir): +def test_wf_upstream_error8(plugin, tmp_path): """workflow with three tasks, the first one raises an error, so 2 others are removed""" @workflow.define(outputs=["out1", "out2"]) - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") addvar2 = workflow.add(FunAddVarDefaultNoType(a=addvar1.out), name="addvar2") addtwo = workflow.add(FunAddTwo(a=addvar1.out)) return addvar2.out, addtwo.out # - wf = Workflow(x="hi") # TypeError for adding str and int + worky = Worky(x="hi") # TypeError for adding str and int with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "addvar1" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.addvar1._errored is True - assert wf.addvar2._errored == wf.addtwo._errored == ["addvar1"] + assert wf["addvar1"]._errored is True + assert wf["addvar2"]._errored == wf["addtwo"]._errored == ["addvar1"] -def test_wf_upstream_error9(plugin, tmpdir): +def test_wf_upstream_error9(plugin, tmp_path): """ workflow with five tasks with two "branches", one branch has an error, the second is fine @@ -4265,7 +3818,7 @@ def test_wf_upstream_error9(plugin, tmpdir): """ @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") @@ -4275,17 +3828,16 @@ def Workflow(x): workflow.add(FunAddVarDefaultNoType(a=addtwo.out)) return follow_err.out # out1 - wf = Workflow(x=2) + worky = Worky(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.err._errored is True - assert wf.follow_err._errored == ["err"] + assert wf["err"]._errored is True + assert wf["follow_err"]._errored == ["err"] -def test_wf_upstream_error9a(plugin, tmpdir): +def test_wf_upstream_error9a(plugin, tmp_path): """ workflow with five tasks with two "branches", one branch has an error, the second is fine @@ -4294,7 +3846,7 @@ def test_wf_upstream_error9a(plugin, tmpdir): """ @workflow.define - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefault(a=x), name="addvar1") err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") @@ -4304,15 +3856,15 @@ def Workflow(x): addvar2 = workflow.add(FunAddVarDefault(a=addtwo.out), name="addvar2") return addvar2.out # out1 # , ("out2", addtwo.out)]) - wf = Workflow(x=2) + worky = Worky(x=2) - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) - assert wf.err._errored is True - assert wf.follow_err._errored == ["err"] + with Submitter(worker=plugin, cache_dir=tmp_path) as sub: + sub(worky) + assert wf["err"]._errored is True + assert wf["follow_err"]._errored == ["err"] -def test_wf_upstream_error9b(plugin, tmpdir): +def test_wf_upstream_error9b(plugin, tmp_path): """ workflow with five tasks with two "branches", one branch has an error, the second is fine @@ -4320,7 +3872,7 @@ def test_wf_upstream_error9b(plugin, tmpdir): """ @workflow.define(outputs=["out1", "out2"]) - def Workflow(x): + def Worky(x): addvar1 = workflow.add(FunAddVarDefaultNoType(a=x), name="addvar1") err = workflow.add(FunAddVarNoType(a=addvar1.out, b="hi"), name="err") @@ -4330,35 +3882,34 @@ def Workflow(x): addvar2 = workflow.add(FunAddVarDefaultNoType(a=addtwo.out), name="addvar2") return follow_err.out, addvar2.out - wf = Workflow(x=2) + worky = Worky(x=2) with pytest.raises(ValueError) as excinfo: - with Submitter(worker=plugin, cache_dir=tmpdir) as sub: - sub(wf) + worky(worker=plugin, cache_dir=tmp_path) assert "err" in str(excinfo.value) assert "raised an error" in str(excinfo.value) - assert wf.err._errored is True - assert wf.follow_err._errored == ["err"] + assert wf["err"]._errored is True + assert wf["follow_err"]._errored == ["err"] -def exporting_graphs(wf, name): +def exporting_graphs(worky, name, out_dir): """helper function to run dot to create png/pdf files from dotfiles""" # exporting the simple graph - dotfile_pr, formatted_dot = wf.create_dotfile(export=True, name=name) + dotfile_pr, formatted_dot = plot_workflow(worky, out_dir, export=True, name=name) assert len(formatted_dot) == 1 assert formatted_dot[0] == dotfile_pr.with_suffix(".png") assert formatted_dot[0].exists() print("\n png of a simple graph in: ", formatted_dot[0]) # exporting nested graph - dotfile_pr, formatted_dot = wf.create_dotfile( - type="nested", export=["pdf", "png"], name=f"{name}_nest" + dotfile_pr, formatted_dot = plot_workflow( + worky, out_dir, type="nested", export=["pdf", "png"], name=f"{name}_nest" ) assert len(formatted_dot) == 2 assert formatted_dot[0] == dotfile_pr.with_suffix(".pdf") assert formatted_dot[0].exists() print("\n pdf of the nested graph in: ", formatted_dot[0]) # detailed graph - dotfile_pr, formatted_dot = wf.create_dotfile( - type="detailed", export="pdf", name=f"{name}_det" + dotfile_pr, formatted_dot = plot_workflow( + worky, out_dir, type="detailed", export="pdf", name=f"{name}_det" ) assert len(formatted_dot) == 1 assert formatted_dot[0] == dotfile_pr.with_suffix(".pdf") @@ -4367,36 +3918,64 @@ def exporting_graphs(wf, name): @pytest.mark.parametrize("splitter", [None, "x"]) -def test_graph_1(tmpdir, splitter): - """creating a set of graphs, wf with two nodes""" +def test_graph_simple(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" @workflow.define - def Workflow(x, y): + def Worky(x=1, y=2): mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") workflow.add(Multiply(x=x, y=x), name="mult_2") add2 = workflow.add(Add2(x=mult_1.out), name="add2") return add2.out - wf = Workflow().split(splitter, x=[1, 2]) + worky = Worky().split(splitter, x=[1, 2]) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, tmp_path, name="simple") dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult_1" in dotstr_s_lines assert "mult_2" in dotstr_s_lines assert "add2" in dotstr_s_lines assert "mult_1 -> add2" in dotstr_s_lines + +@pytest.mark.parametrize("splitter", [None, "x"]) +def test_graph_nested(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky().split(splitter, x=[1, 2]) + # nested graph (should have the same elements) - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, tmp_path, type="nested", name="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult_1" in dotstr_n_lines assert "mult_2" in dotstr_n_lines assert "add2" in dotstr_n_lines assert "mult_1 -> add2" in dotstr_n_lines + +@pytest.mark.parametrize("splitter", [None, "x"]) +def test_graph_detailed(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky().split(splitter, x=[1, 2]) + # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, tmp_path, type="detailed", name="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4404,28 +3983,41 @@ def Workflow(x, y): ) assert "struct_mult_1:out -> struct_add2:x;" in dotstr_d_lines - # exporting graphs if dot available - if DOT_FLAG: - name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + +@pytest.mark.skipif(not DOT_FLAG, reason="dot not available") +@pytest.mark.parametrize("splitter", [None, "x"]) +def test_graph_export_dot(tmp_path, splitter): + """creating a set of graphs, worky with two nodes""" + + @workflow.define + def Worky(x=1, y=2): + mult_1 = workflow.add(Multiply(x=x, y=y), name="mult_1") + workflow.add(Multiply(x=x, y=x), name="mult_2") + add2 = workflow.add(Add2(x=mult_1.out), name="add2") + return add2.out + + worky = Worky().split(splitter, x=[1, 2]) + + name = f"graph_{sys._getframe().f_code.co_name}" + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_1st(tmpdir): - """creating a set of graphs, wf with two nodes +def test_graph_1st(tmp_path): + """creating a set of graphs, worky with two nodes some nodes have splitters, should be marked with blue color """ @workflow.define - def Workflow(x, y): + def Worky(x, y): mult_1 = workflow.add(Multiply(y=y).split("x", x=x), name="mult_1") workflow.add(Multiply(x=x, y=x), name="mult_2") add2 = workflow.add(Add2(x=mult_1.out), name="add2") return add2.out - wf = Workflow(x=[1, 2], y=2) + worky = Worky(x=[1, 2], y=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult_1 [color=blue]" in dotstr_s_lines assert "mult_2" in dotstr_s_lines @@ -4433,7 +4025,7 @@ def Workflow(x, y): assert "mult_1 -> add2 [color=blue]" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult_1 [color=blue]" in dotstr_n_lines assert "mult_2" in dotstr_n_lines @@ -4441,7 +4033,7 @@ def Workflow(x, y): assert "mult_1 -> add2 [color=blue]" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4451,25 +4043,25 @@ def Workflow(x, y): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_1st_cmb(tmpdir): - """creating a set of graphs, wf with three nodes +def test_graph_1st_cmb(tmp_path): + """creating a set of graphs, worky with three nodes the first one has a splitter, the second has a combiner, so the third one is stateless first two nodes should be blue and the arrow between them should be blue """ @workflow.define - def Workflow(x, y): + def Worky(x, y): mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") add2 = workflow.add(Add2(x=mult.out).combine("mult.x"), name="add2") sum = workflow.add(ListSum(x=add2.out), name="sum") return sum.out - wf = Workflow(x=[1, 2], y=2) + worky = Worky(x=[1, 2], y=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult [color=blue]" in dotstr_s_lines assert "add2 [color=blue]" in dotstr_s_lines @@ -4478,7 +4070,7 @@ def Workflow(x, y): assert "add2 -> sum" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult [color=blue]" in dotstr_n_lines assert "add2 [color=blue]" in dotstr_n_lines @@ -4487,7 +4079,7 @@ def Workflow(x, y): assert "add2 -> sum" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4497,11 +4089,11 @@ def Workflow(x, y): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_2(tmpdir): - """creating a graph, wf with one workflow as a node""" +def test_graph_2(tmp_path): + """creating a graph, worky with one workflow as a node""" @workflow.define def Wfnd(x): @@ -4509,25 +4101,25 @@ def Wfnd(x): return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x), name="wfnd") return wfnd.out - wf = Workflow(x=2) + worky = Worky(x=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "wfnd [shape=box]" in dotstr_s_lines # nested graph - dotfile = wf.create_dotfile(type="nested") + dotfile = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_lines = dotfile.read_text().split("\n") assert "subgraph cluster_wfnd {" in dotstr_lines assert "add2" in dotstr_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x}}"];' in dotstr_d_lines @@ -4535,11 +4127,11 @@ def Workflow(x): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_2st(tmpdir): - """creating a set of graphs, wf with one workflow as a node +def test_graph_2st(tmp_path): + """creating a set of graphs, worky with one workflow as a node the inner workflow has a state, so should be blue """ @@ -4549,26 +4141,26 @@ def Wfnd(x): return add2.out @workflow.define - def Workflow(x): + def Worky(x): wfnd = workflow.add(Wfnd(x=x).split("x", x=x), name="wfnd") return wfnd.out - wf = Workflow(x=[1, 2]) + worky = Worky(x=[1, 2]) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "wfnd [shape=box, color=blue]" in dotstr_s_lines # nested graph - dotfile_s = wf.create_dotfile(type="nested") + dotfile_s = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_s_lines = dotfile_s.read_text().split("\n") assert "subgraph cluster_wfnd {" in dotstr_s_lines assert "color=blue" in dotstr_s_lines assert "add2" in dotstr_s_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x}}"];' in dotstr_d_lines @@ -4577,11 +4169,11 @@ def Workflow(x): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_3(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow)""" +def test_graph_3(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow)""" @workflow.define def Wfnd(x): @@ -4589,29 +4181,29 @@ def Wfnd(x): return add2.out @workflow.define - def Workflow(x, y): + def Worky(x, y=1): mult = workflow.add(Multiply(x=x, y=y), name="mult") wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") return wfnd.out - wf = Workflow(x=2) + worky = Worky(x=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult" in dotstr_s_lines assert "wfnd [shape=box]" in dotstr_s_lines assert "mult -> wfnd" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult" in dotstr_n_lines assert "subgraph cluster_wfnd {" in dotstr_n_lines assert "add2" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4621,11 +4213,11 @@ def Workflow(x, y): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_3st(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow) +def test_graph_3st(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow) the first node has a state and it should be passed to the second node (blue node and a wfasnd, and blue arrow from the node to the wfasnd) """ @@ -4636,29 +4228,29 @@ def Wfnd(x): return add2.out @workflow.define - def Workflow(x, y): + def Worky(x, y): mult = workflow.add(Multiply(y=y).split("x", x=x), name="mult") wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") return wfnd.out - wf = Workflow(x=[1, 2], y=2) + worky = Worky(x=[1, 2], y=2) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult [color=blue]" in dotstr_s_lines assert "wfnd [shape=box, color=blue]" in dotstr_s_lines assert "mult -> wfnd [color=blue]" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") assert "mult [color=blue]" in dotstr_n_lines assert "subgraph cluster_wfnd {" in dotstr_n_lines assert "add2" in dotstr_n_lines # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4668,11 +4260,11 @@ def Workflow(x, y): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_4(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow with two nodes +def test_graph_4(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow with two nodes inside). Connection from the node to the inner workflow. """ @@ -4683,22 +4275,22 @@ def Wfnd(x): return add2_b.out @workflow.define - def Workflow(x, y): + def Worky(x, y): mult = workflow.add(Multiply(x=x, y=y), name="mult") wfnd = workflow.add(Wfnd(x=mult.out), name="wfnd") return wfnd.out - wf = Workflow(x=2, y=3) + worky = Worky(x=2, y=3) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult" in dotstr_s_lines assert "wfnd [shape=box]" in dotstr_s_lines assert "mult -> wfnd" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") for el in ["mult", "add2_a", "add2_b"]: assert el in dotstr_n_lines @@ -4707,7 +4299,7 @@ def Workflow(x, y): assert "mult -> add2_a [lhead=cluster_wfnd]" # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4717,11 +4309,11 @@ def Workflow(x, y): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) -def test_graph_5(tmpdir): - """creating a set of graphs, wf with two nodes (one node is a workflow with two nodes +def test_graph_5(tmp_path): + """creating a set of graphs, worky with two nodes (one node is a workflow with two nodes inside). Connection from the inner workflow to the node. """ @@ -4732,22 +4324,22 @@ def Wfnd(x): return add2_b.out @workflow.define - def Workflow(x, y): + def Worky(x, y): wfnd = workflow.add(Wfnd(x=x), name="wfnd") mult = workflow.add(Multiply(x=wfnd.out, y=y), name="mult") return mult.out - wf = Workflow(x=2, y=3) + worky = Worky(x=2, y=3) # simple graph - dotfile_s = wf.create_dotfile() + dotfile_s = plot_workflow(worky, out_dir=tmp_path) dotstr_s_lines = dotfile_s.read_text().split("\n") assert "mult" in dotstr_s_lines assert "wfnd [shape=box]" in dotstr_s_lines assert "wfnd -> mult" in dotstr_s_lines # nested graph - dotfile_n = wf.create_dotfile(type="nested") + dotfile_n = plot_workflow(worky, out_dir=tmp_path, type="nested") dotstr_n_lines = dotfile_n.read_text().split("\n") for el in ["mult", "add2_a", "add2_b"]: assert el in dotstr_n_lines @@ -4756,7 +4348,7 @@ def Workflow(x, y): assert "add2_b -> mult [ltail=cluster_wfnd]" # detailed graph - dotfile_d = wf.create_dotfile(type="detailed") + dotfile_d = plot_workflow(worky, out_dir=tmp_path, type="detailed") dotstr_d_lines = dotfile_d.read_text().split("\n") assert ( 'struct_wf [color=red, label="{WORKFLOW INPUT: | { x | y}}"];' @@ -4766,11 +4358,11 @@ def Workflow(x, y): if DOT_FLAG: name = f"graph_{sys._getframe().f_code.co_name}" - exporting_graphs(wf=wf, name=name) + exporting_graphs(worky=worky, name=name, out_dir=tmp_path) @pytest.mark.timeout(20) -def test_duplicate_input_on_split_wf(tmpdir): +def test_duplicate_input_on_split_wf(tmp_path): """checking if the workflow gets stuck if it has to run two tasks with equal checksum; This can occur when splitting on a list containing duplicate values. """ @@ -4781,22 +4373,19 @@ def printer(a): return a @workflow.define - def Workflow(text): + def Worky(text): printer1 = workflow.add(printer(a=text)) return printer1.out # out1 - wf = Workflow().split(text=text) + worky = Worky().split(text=text) - with Submitter(worker="cf", n_procs=6) as sub: - results = sub(wf) + outputs = worky(worker="cf", n_procs=6) - assert not results.errored, "\n".join(results.errors["error message"]) - - assert results.output.out1[0] == "test" and results.output.out1[0] == "test" + assert outputs.out1[0] == "test" and outputs.out1[0] == "test" @pytest.mark.timeout(40) -def test_inner_outer_wf_duplicate(tmpdir): +def test_inner_outer_wf_duplicate(tmp_path): """checking if the execution gets stuck if there is an inner and outer workflows that run two nodes with the exact same inputs. """ @@ -4815,7 +4404,7 @@ def OneArgInner(start_number): start_number += 1 return start_number - # Inner Workflow + # Inner Worky @workflow.define(outputs=["res"]) def InnerWf(start_number1): inner_level1 = workflow.add(OneArgInner(start_number=start_number1)) @@ -4837,33 +4426,37 @@ def OuterWf(start_number, task_name, dummy): with Submitter(worker="cf") as sub: res = sub(test_outer) - assert res.output.res2[0] == 23 and res.output.res2[1] == 23 + assert res.outputs.res2[0] == 23 and res.outputs.res2[1] == 23 -def test_rerun_errored(tmpdir, capfd): +def test_rerun_errored(tmp_path, capfd): """Test rerunning a workflow containing errors. Only the errored tasks and workflow should be rerun""" + class EvenException(Exception): + pass + @python.define def PassOdds(x): if x % 2 == 0: print(f"x%2 = {x % 2} (error)\n") - raise Exception("even error") + raise EvenException("even error") else: print(f"x%2 = {x % 2}\n") return x @workflow.define - def Workflow(x): + def Worky(x): pass_odds = workflow.add(PassOdds().split("x", x=x)) return pass_odds.out - wf = Workflow(x=[1, 2, 3, 4, 5]) + worky = Worky(x=[1, 2, 3, 4, 5]) - with pytest.raises(Exception): - wf() - with pytest.raises(Exception): - wf() + with pytest.raises(RuntimeError): + # Must be cf to get the error from all tasks, otherwise will only get the first error + worky(worker="cf", cache_dir=tmp_path, n_procs=5) + with pytest.raises(RuntimeError): + worky(worker="cf", cache_dir=tmp_path, n_procs=5) out, err = capfd.readouterr() stdout_lines = out.splitlines() @@ -4883,9 +4476,9 @@ def Workflow(x): assert errors_found == 4 -def test_wf_state_arrays(): +def test_wf_state_arrays(tmp_path, plugin): @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) - def Workflow(x: ty.List[int], y: int): + def Worky(x: ty.List[int], y: int): A = workflow.add( # Split over workflow input "x" on "scalar" input ListMultSum( @@ -4893,21 +4486,21 @@ def Workflow(x: ty.List[int], y: int): ).split(scalar=x) ) - B = workflow.add( # Workflow is still split over "x", combined over "x" on out + B = workflow.add( # Worky is still split over "x", combined over "x" on out ListMultSum( scalar=A.sum, in_list=A.products, ).combine("A.scalar") ) - C = workflow.add( # Workflow " + C = workflow.add( # Worky " ListMultSum( scalar=y, in_list=B.sum, ) ) - D = workflow.add( # Workflow is split again, this time over C.products + D = workflow.add( # Worky is split again, this time over C.products ListMultSum( in_list=x, ) @@ -4915,32 +4508,34 @@ def Workflow(x: ty.List[int], y: int): .combine("scalar") ) - E = workflow.add( # Workflow is finally combined again into a single node + E = workflow.add( # Worky is finally combined again into a single node ListMultSum(scalar=y, in_list=D.sum) ) return E.sum, E.products - wf = Workflow(x=[1, 2, 3, 4], y=10) + worky = Worky(x=[1, 2, 3, 4], y=10) - results = wf() - assert results.outputs.alpha == 3000000 - assert results.outputs.beta == [100000, 400000, 900000, 1600000] + outputs = worky(cache_dir=tmp_path, plugin=plugin) + assert outputs.alpha == 3000000 + assert outputs.beta == [100000, 400000, 900000, 1600000] -def test_wf_input_output_typing(): +def test_wf_input_typing_fail(): @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) - def MismatchInputWf(x: int, y: ty.List[int]): + def MismatchInputWf(x: int, y: int): ListMultSum( scalar=y, in_list=y, name="A", ) - with pytest.raises(TypeError) as exc_info: + with pytest.raises(TypeError, match="Incorrect type for field in 'y'"): MismatchInputWf(x=1, y=[1, 2, 3]) - exc_info_matches(exc_info, "Cannot coerce into ") + + +def test_wf_output_typing_fail(): @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) def MismatchOutputWf(x: int, y: ty.List[int]): @@ -4955,8 +4550,10 @@ def MismatchOutputWf(x: int, y: ty.List[int]): with pytest.raises(TypeError, match="don't match their declared types"): MismatchOutputWf(x=1, y=[1, 2, 3]) + +def test_wf_input_output_typing(): @workflow.define(outputs={"alpha": int, "beta": ty.List[int]}) - def Workflow(x: int, y: ty.List[int]): + def Worky(x: int, y: ty.List[int]): A = workflow.add( # Split over workflow input "x" on "scalar" input ListMultSum( scalar=x, @@ -4965,6 +4562,6 @@ def Workflow(x: int, y: ty.List[int]): ) return A.sum, A.products - outputs = Workflow(x=10, y=[1, 2, 3, 4])() + outputs = Worky(x=10, y=[1, 2, 3, 4])() assert outputs.sum == 10 assert outputs.products == [10, 20, 30, 40]